BmnRoot
Loading...
Searching...
No Matches
filter_raw_data Namespace Reference

Functions

 argument_parser ()
 
 size_human (num, suffix="B")
 

Variables

int MIN_RUN_SIZE = 500*1024*1024
 
int MAX_FILE_SIZE = 50*1024*1024*1024
 
bool filter_only_small = True
 
 log_level = logging.INFO
 
 format
 
 datefmt
 
 handlers
 
 level
 
 input_file_dir = argument_parser()
 
 all_files = glob.glob(input_file_dir + '/**/*', recursive=True)
 
 run_numbers = set(map(lambda x:"0" if not re.findall(r'\d+', os.path.basename(x)) else re.findall(r'\d+', os.path.basename(x))[0], all_files))
 
list correct_files = [[y for y in all_files if x in os.path.basename(y)] for x in run_numbers if x != "0"]
 
list wrong_files = [x for x in all_files if not re.findall(r'\d+', os.path.basename(x))]
 
 result_group_file = open("filter_raw_data.grp", 'w')
 
 result_single_file = open("filter_raw_data.lst", 'w')
 
bool small_file = False
 
int sum_group_size = 0
 
 file_size = os.path.getsize(cur_file)
 
 file
 
 part_numbers = set(map(lambda x:int(re.findall(r'\d+', os.path.basename(x))[-1]), cur_file_group))
 
 max_idx
 
 max_part_number
 
 key
 
bool is_not_last = False
 
 cur_part_number = re.findall(r'\d+', os.path.basename(cur_file))[-1]
 
list sub_cur_file_group = [x for x in cur_file_group if (re.findall(r'\d+', os.path.basename(x))[-1] == cur_part_number) and (x != cur_file)]
 
 sub_file_size = os.path.getsize(sub_cur_file)
 

Function Documentation

◆ argument_parser()

filter_raw_data.argument_parser ( )

Definition at line 35 of file filter_raw_data.py.

◆ size_human()

filter_raw_data.size_human (   num,
  suffix = "B" 
)

Definition at line 54 of file filter_raw_data.py.

Variable Documentation

◆ all_files

filter_raw_data.all_files = glob.glob(input_file_dir + '/**/*', recursive=True)

Definition at line 66 of file filter_raw_data.py.

◆ correct_files

list filter_raw_data.correct_files = [[y for y in all_files if x in os.path.basename(y)] for x in run_numbers if x != "0"]

Definition at line 76 of file filter_raw_data.py.

◆ cur_part_number

filter_raw_data.cur_part_number = re.findall(r'\d+', os.path.basename(cur_file))[-1]

Definition at line 132 of file filter_raw_data.py.

◆ datefmt

filter_raw_data.datefmt

Definition at line 27 of file filter_raw_data.py.

◆ file

filter_raw_data.file

Definition at line 104 of file filter_raw_data.py.

◆ file_size

filter_raw_data.file_size = os.path.getsize(cur_file)

Definition at line 93 of file filter_raw_data.py.

◆ filter_only_small

bool filter_raw_data.filter_only_small = True

Definition at line 22 of file filter_raw_data.py.

◆ format

filter_raw_data.format

Definition at line 26 of file filter_raw_data.py.

◆ handlers

filter_raw_data.handlers

Definition at line 28 of file filter_raw_data.py.

◆ input_file_dir

filter_raw_data.input_file_dir = argument_parser()

Definition at line 61 of file filter_raw_data.py.

◆ is_not_last

bool filter_raw_data.is_not_last = False

Definition at line 124 of file filter_raw_data.py.

◆ key

filter_raw_data.key

Definition at line 123 of file filter_raw_data.py.

◆ level

filter_raw_data.level

Definition at line 32 of file filter_raw_data.py.

◆ log_level

filter_raw_data.log_level = logging.INFO

Definition at line 23 of file filter_raw_data.py.

◆ MAX_FILE_SIZE

int filter_raw_data.MAX_FILE_SIZE = 50*1024*1024*1024

Definition at line 20 of file filter_raw_data.py.

◆ max_idx

filter_raw_data.max_idx

Definition at line 123 of file filter_raw_data.py.

◆ max_part_number

filter_raw_data.max_part_number

Definition at line 123 of file filter_raw_data.py.

◆ MIN_RUN_SIZE

int filter_raw_data.MIN_RUN_SIZE = 500*1024*1024

Definition at line 19 of file filter_raw_data.py.

◆ part_numbers

filter_raw_data.part_numbers = set(map(lambda x:int(re.findall(r'\d+', os.path.basename(x))[-1]), cur_file_group))

Definition at line 122 of file filter_raw_data.py.

◆ result_group_file

filter_raw_data.result_group_file = open("filter_raw_data.grp", 'w')

Definition at line 81 of file filter_raw_data.py.

◆ result_single_file

filter_raw_data.result_single_file = open("filter_raw_data.lst", 'w')

Definition at line 82 of file filter_raw_data.py.

◆ run_numbers

filter_raw_data.run_numbers = set(map(lambda x:"0" if not re.findall(r'\d+', os.path.basename(x)) else re.findall(r'\d+', os.path.basename(x))[0], all_files))

Definition at line 73 of file filter_raw_data.py.

◆ small_file

bool filter_raw_data.small_file = False

Definition at line 87 of file filter_raw_data.py.

◆ sub_cur_file_group

list filter_raw_data.sub_cur_file_group = [x for x in cur_file_group if (re.findall(r'\d+', os.path.basename(x))[-1] == cur_part_number) and (x != cur_file)]

Definition at line 137 of file filter_raw_data.py.

◆ sub_file_size

filter_raw_data.sub_file_size = os.path.getsize(sub_cur_file)

Definition at line 140 of file filter_raw_data.py.

◆ sum_group_size

int filter_raw_data.sum_group_size = 0

Definition at line 89 of file filter_raw_data.py.

◆ wrong_files

list filter_raw_data.wrong_files = [x for x in all_files if not re.findall(r'\d+', os.path.basename(x))]

Definition at line 77 of file filter_raw_data.py.