-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathupdate_entries.py
executable file
·78 lines (65 loc) · 3.75 KB
/
update_entries.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/env python3
# Copyright (C) 2023
# Petrakopoulos Vasilis
# EIDA Technical Committee @ National Observatory of Athens, Greece
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
# This is a script used for updating files in WFCatalog with inconsistent checksums or older creation date than last time modified in archive.
# The script reads these files from the tables "inconsistent_checksum" and "older_date" of the
# "inconsistencies_results.db" SQLite database file, which is produced by executing the "check_consistency.py" script.
# Simply execute the script AFTER ensuring that paths and collector options -below import statements- are set according to your system.
import sqlite3
import os
import json
import subprocess
import logging
# change the below according to your system
wfcCollectorDir = os.getenv('WFCC_COLLECTOR_DIR', '/home/Programs/wfcatalog/collector')
wfcCollectorEnv = f'{wfcCollectorDir}/.env/bin/python' # WFCatalog collector virtual environment
wfcConfigFile = f'{wfcCollectorDir}/config.json' # WFCatalog collector config.json file
wfcCollector = f'{wfcCollectorDir}/WFCatalogCollector.py' # WFCatalogCollector.py script
collectorOptions = ['--flags', '--csegs', '--update', '--force', '--list'] # options to execute WFCatalogCollector.py script
batch_size = 500 # the collector script will be executed for batches of this size of files, otherwise bash command size limit might be exceeded
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO) # if desired modify this line to output logging details to a specified file
# connect to SQLite database if exists
if os.path.exists(os.path.join(os.getcwd(), 'inconsistencies_results.db')):
logging.info("Retrieving names of files to be updated in WFCatalog")
conn = sqlite3.connect('inconsistencies_results.db')
cursor = conn.cursor()
# retrieve file names of files to be updated in WFCatalog
file_ids = cursor.execute('SELECT fileName FROM inconsistent_checksum UNION SELECT fileName FROM older_date').fetchall()
conn.close()
# open config.json file of WFCatalog collector to ensure that the WHITE filter does not exclude files to be added
logging.info("Write to config.json of WFCatalog collector")
with open(wfcConfigFile, 'r') as config_file:
config = json.load(config_file)
old_white = config["FILTERS"]["WHITE"]
config["FILTERS"]["WHITE"] = ["*"]
with open(wfcConfigFile, 'w') as config_file:
json.dump(config, config_file, indent=2)
# execute the WFCatalog collector in batches
for i in range(0, len(file_ids), batch_size):
batch = [f[0] for f in file_ids[i:i+batch_size]]
logging.info(f"Execute WFCatalog collector for batch {int(i/batch_size+1)}")
try:
subprocess.run([wfcCollectorEnv, wfcCollector] + collectorOptions + [json.dumps(batch)])
except KeyboardInterrupt:
# this will enforce the undoing of changes in the config.json file in case of interrupt
break
# undo the changes in the config.json file
logging.info("Undo changes to config.json")
with open(wfcConfigFile, 'r') as config_file:
config = json.load(config_file)
config["FILTERS"]["WHITE"] = old_white
with open(wfcConfigFile, 'w') as config_file:
json.dump(config, config_file, indent=2)