11logging.getLogger().setLevel(logging.INFO)
14 parser = argparse.ArgumentParser(
15 description=
'Script for matching raw file metadata with the File Catalogue\
16 For more information run with --help option'
31 choices=[
'ddc',
'ncx',
'cicc'],
32 help=
'Name of the storage to write raw file metadata to the File Catalogue',
39 help=
'Remove file metadata in the File Catalogue if there is no corresponding file on the storage'
42 args = parser.parse_args()
44 run_number, storage_name, force_delete = args.run, args.storage, args.delete
46 return run_number[0],storage_name[0],force_delete
49run_number, storage_name, force_delete = argument_parser()
50logging.info(
"Matching raw file metadata with the File Catalogue started for '%s' storage" % (storage_name))
53config = json.load(open(
"file_catalogue.json"))
57 conn = psycopg2.connect((
"dbname=%s user=%s host=%s password=%s") % (config[
"db_name"], config[
"db_user"], config[
"db_host"], config[
"db_pass"]))
59 logging.error(
"ERROR: Database connection failed: %s" % (e))
64cursor.execute(
"select rs.run_storage_id, rs.storage_path, s.xrootd_url "
65 "from run_storage rs join storage_ s on rs.storage_name = s.storage_name "
66 "where rs.run_number=%d and rs.storage_name='%s'" % (run_number, storage_name))
68run_storage_id, storage_path, xrootd_url = cursor.fetchone()
69if storage_path
is None:
70 logging.error(
"ERROR: No storage information (%s) found in the database for Run%s" % (storage_name, run_number))
74cursor.execute(
"select fc.file_guid, fc.file_path, fc.file_size, fc.file_checksum, 0 "
75 "from file_catalogue fc join run_storage rs on fc.run_storage_id = rs.run_storage_id "
76 "where rs.run_storage_id=%d" % (run_storage_id))
78catalogue_files = cursor.fetchall()
80catalogue_matches = [0
for i
in range(len(catalogue_files))]
84all_files = glob.glob(storage_path +
'/**/*', recursive=
True)
90for cur_file
in all_files:
91 logging.debug(
"Current file '%s'" % (cur_file))
95 for file_record
in catalogue_files:
96 catalogue_file_guid, catalogue_file_path, catalogue_file_size, catalogue_file_checksum, catalogue_file_matched = file_record
97 if catalogue_file_path == cur_file:
98 catalogue_matches[i] = 1
105 if file_record
is not None:
106 logging.debug(
'The current file exists')
107 catalogue_file_guid, catalogue_file_path, catalogue_file_size, catalogue_file_checksum, catalogue_file_matched = file_record
109 if catalogue_file_size
is None:
111 file_size = os.path.getsize(cur_file)
112 cursor.execute(
"update file_catalogue set file_size=%d where file_guid=%d" % (file_size, catalogue_file_guid))
114 logging.debug(
'The size of the file has been updated in the File Catalogue')
115 except OSError
as error:
116 logging.error(
"ERROR: File is not accessible to get its size: %s" % (cur_file))
118 if catalogue_file_checksum
is None:
120 if xrootd_url
is None:
124 if (file_checksum !=
''):
125 cursor.execute(
"update file_catalogue set file_checksum=%s where file_guid=%d" % (file_checksum, catalogue_file_guid))
127 logging.debug(
'The checksum of the file has been updated in the File Catalogue')
129 logging.info(
"Adding file '%s' to the File Catalogue" % (cur_file))
131 file_size = os.path.getsize(cur_file)
132 except OSError
as error:
133 logging.error(
"ERROR: File is not accessible to get its size: %s" % (cur_file))
136 if xrootd_url
is None:
140 if (file_checksum ==
''):
143 logging.debug(
'Inserting a new file record with file path = "%s", file size = %d, file_checksum = %s' % (cur_file, file_size, file_checksum))
144 cursor.execute(
"insert into file_catalogue(run_storage_id, file_path, file_size, file_checksum) "
145 "values (%d, '%s', %d, '%s')" % (run_storage_id, cur_file, file_size, file_checksum))
147 logging.info(
"Information on the current file has been successfullly added\n")
151for file_record
in catalogue_files:
152 catalogue_file_guid, catalogue_file_path, catalogue_file_size, catalogue_file_checksum, catalogue_file_matched = file_record
153 if catalogue_matches[i] == 0:
154 logging.warning(
"WARNING: The record in the File Catalogue ('%s') has no corresponding file" % (catalogue_file_path))
156 cursor.execute(
"delete from file_catalogue where run_storage_id = %d and file_path = '%s'" % (run_storage_id, catalogue_file_path))
158 logging.info(
"The record on the absent file in the File Catalogue has been deleted")
163logging.info(
"Matching raw file metadata with the File Catalogue finished for '%s' storage" % (storage_name))
get_file_xrdsum(file_path, xrootd_url)
get_file_adler32c(file_path)