6from collections
import defaultdict
9THRESHOLD = 5 * 1024 * 1024 * 1024
13if not os.path.exists(BAD_DIR):
15 print(f
"Created directory: {BAD_DIR}")
20pattern =
r'^run_(.*)_(\d+)_ev\d+_p\d+\.data$'
24 f
for f
in os.listdir(
'.')
25 if os.path.isfile(f)
and f.startswith(
'run_')
and f.endswith(
'.data')
29run_groups = defaultdict(list)
31 match = re.match(pattern, f)
33 label, run_num_str = match.groups()
34 run_num = int(run_num_str)
35 run_groups[run_num].append(f)
37 print(f
"Skipping file: {f} (does not match expected pattern)")
41for run_num, files
in sorted(run_groups.items()):
46 total_size = sum(os.path.getsize(f)
for f
in files)
47 num_files = len(files)
49 if total_size < THRESHOLD:
50 print(f
"Bad run {run_num}: {num_files} files, {total_size:,} bytes (< {THRESHOLD:,} bytes) → moving to {BAD_DIR}/")
52 shutil.move(f, os.path.join(BAD_DIR, f))
55 print(f
"Good run {run_num}: {num_files} files, {total_size:,} bytes")
57print(f
"\nProcessing complete. Moved {moved_count} files to '{BAD_DIR}'.")