Skip to content

Commit 25d44ab

Browse files
author
7048730
committed
upd broker
1 parent d708922 commit 25d44ab

11 files changed

+99
-106
lines changed

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
# specific
22
extract/tests/all
3+
broker/tests/all
34
metadata_raw.json
5+
metadata_o2r.json
46

57
# Byte-compiled / optimized / DLL files
68
__pycache__/

.travis.yml

+1
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,4 @@ install:
1919
- pip install -r requirements.txt
2020
script:
2121
- python o2rmeta.py -debug extract -i extract/tests -o extract/tests -xo
22+
- python o2rmeta.py -debug validate -s schema/json/o2r-meta-schema.json -c schema/json/example1-valid.json

README.md

+19-5
Original file line numberDiff line numberDiff line change
@@ -61,11 +61,11 @@ Each tool then has a number of required arguments:
6161

6262
#(1) Extractor tool:
6363

64-
python o2rmeta extract -i <INPUT_DIR> -s|-o <OUTPUT_DIR> [-xo] [-m] [-xml] [-ercid <ERC_ID>]
64+
python o2rmeta.py extract -i <INPUT_DIR> -s|-o <OUTPUT_DIR> [-xo] [-m] [-xml] [-ercid <ERC_ID>]
6565

6666
Example call:
6767

68-
python o2rmeta extract -i extract/tests -o extract/tests -xo
68+
python o2rmeta.py -debug extract -i extract/tests -o extract/tests -xo
6969

7070
Explanation of the switches:
7171

@@ -79,15 +79,29 @@ Explanation of the switches:
7979

8080

8181
#(2) broker
82-
TDB
82+
83+
python o2rmeta.py broker -i <INPUT_DIR/FILE> -m <MAPPING_FILE> -s|-o <OUTPUT_DIR>
84+
85+
Example call:
86+
87+
python o2rmeta.py -debug broker -i broker/tests -m broker/mappings/o2r-map.json -o broker/tests/all
88+
89+
Explanation of the switches:
90+
91+
+ `-i` <INPUT_DIR> : required starting path for recursive search for parsable files.
92+
+ `-m` <MAPPING_FILE> : required path to a json mapping file that holds translation instructions for the metadata mappings. #TBD: mapping json schema.
93+
+ `-s`: option to print out results to console. This switch is mutually exclusive with `-o`. At least one of them must be given
94+
+ `-o` <OUTPUT_DIR> : required output path, where data should be saved. If the directory does not exist, it will be created on runtime. This switch is mutually exclusive with `-s`. At least one of them must be given.
95+
96+
8397

8498
#(3) Validator tool:
8599

86-
python o2rmeta validate -s <SCHEMA> -c <CANDIDATE>
100+
python o2rmeta.py validate -s <SCHEMA> -c <CANDIDATE>
87101

88102
Example call:
89103

90-
python o2rmeta validate -s schema/json/o2r-meta-schema.json -c schema/json/example1-valid.json
104+
python o2rmeta.py -debug validate -s schema/json/o2r-meta-schema.json -c schema/json/example1-valid.json
91105

92106
Explanation of the switches:
93107

broker/Dockerfile

-11
This file was deleted.

broker/mappings/o2r-map.json

+1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
{
22
"Settings": {
33
"name": "o2r",
4+
"outputfile": "metadata_o2r",
45
"map_description": "maps raw extracted metadata to o2r schema compliant metadata",
56
"mode": "json",
67
"root": ""

broker/metabroker.py

+62-80
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def do_outputs(output_data, out_mode, out_name, file_ext):
3434
else:
3535
try:
3636
# output path is given in <out_mode>
37-
output_filename = os.path.join(out_mode, '_'.join((out_name, file_ext)))
37+
output_filename = os.path.join(out_mode, ''.join((out_name, file_ext)))
3838
if not os.path.exists(out_mode):
3939
os.makedirs(out_mode)
4040
with open(output_filename, 'w', encoding='utf-8') as outfile:
@@ -191,87 +191,69 @@ def map_xml(element, value, map_data, xml_root):
191191

192192

193193
def status_note(msg):
194-
print(''.join(('[metabroker] ', msg)))
194+
print(''.join(('[o2rmeta][broker] ', msg)))
195195

196196

197197
# Main
198-
if __name__ == "__main__":
199-
if sys.version_info[0] < 3:
200-
# py2
201-
status_note('requires py3k or later')
202-
sys.exit()
198+
def start(**kwargs):
199+
input_dir = kwargs.get('i', None)
200+
output_dir = kwargs.get('o', None)
201+
output_to_console = kwargs.get('s', None)
202+
seperator = '#' #<-- make this generic
203+
my_map = kwargs.get('m', None)
204+
# output mode
205+
if output_to_console:
206+
output_mode = '@s'
207+
elif output_dir:
208+
output_mode = output_dir
209+
if not os.path.isdir(output_dir):
210+
status_note(''.join(('directory at <', output_dir, '> will be created during extraction...')))
203211
else:
204-
my_version = 1
205-
my_mod = ''
206-
try:
207-
my_mod = datetime.datetime.fromtimestamp(os.stat(__file__).st_mtime)
208-
except OSError:
209-
pass
210-
status_note(''.join(('v', str(my_version), ' - ', str(my_mod))))
211-
parser = argparse.ArgumentParser(description='description')
212-
parser.add_argument('-m', '--map', help='name of the mapping file', required=True)
213-
parser.add_argument('-i', '--inputdir', help='input directory', required=True)
214-
group = parser.add_mutually_exclusive_group(required=True)
215-
group.add_argument('-o', '--outputdir', help='output directory for extraction docs')
216-
group.add_argument('-s', '--outputtostdout', help='output the result of the extraction to stdout', action='store_true', default=False)
217-
args = parser.parse_args()
218-
args_dict = vars(args)
219-
input_dir = args_dict['inputdir']
220-
output_dir = args_dict['outputdir']
221-
output_to_console = args_dict['outputtostdout']
222-
seperator = '#' #<-- make this generic
223-
my_map = args_dict['map']
224-
# output mode
225-
if output_to_console:
226-
output_mode = '@s'
227-
elif output_dir:
228-
output_mode = output_dir
229-
if not os.path.isdir(output_dir):
230-
status_note(''.join(('directory <', output_dir, '> will be created during extraction...')))
231-
else:
232-
# not possible currently because output arg group is on mutual exclusive
233-
output_mode = '@none'
212+
# not possible currently because output arg group is on mutual exclusive
213+
output_mode = '@none'
234214

235-
# open map file and find out mode
236-
try:
237-
with open(os.path.join('mappings', my_map), encoding='utf-8') as data_file:
238-
map_file = json.load(data_file)
239-
settings_data = map_file['Settings']
240-
map_data = map_file['Map']
241-
my_mode = settings_data['mode']
242-
except:
243-
raise
244-
# distinguish format for output
245-
if my_mode == 'json':
246-
for file in os.listdir(input_dir):
247-
if os.path.basename(file).startswith('meta_'):
248-
json_output = {}
249-
with open(os.path.join(input_dir, file), encoding='utf-8') as data_file:
250-
test_data = json.load(data_file)
251-
for element in test_data:
252-
try:
253-
map_json(element, test_data[element], map_data, json_output)
254-
except:
255-
raise
256-
do_outputs(json_output, output_mode, 'o2r_'+os.path.splitext(file)[0], '.json')
257-
elif my_mode == 'txt':
258-
# to do: handle txt based maps like bagit
259-
txt_output = ''
260-
do_outputs(txt_output, output_mode, '.txt')
261-
elif my_mode == 'xml':
262-
root = ET.Element(settings_data['root'])
263-
# to do: generify for complex xml maps
264-
root.set('xmlns', settings_data['root@xmlns'])
265-
root.set('xmlns:xsi', settings_data['root@xmlns:xsi'])
266-
root.set('xsi:schemaLocation', settings_data['root@xsi:schemaLocation'])
267-
with open(os.path.join('tests', 'meta_test1.json'), encoding='utf-8') as data_file:
268-
test_data = json.load(data_file)
269-
for element in test_data:
270-
try:
271-
map_xml(element, test_data[element], map_data, root)
272-
except:
273-
raise
274-
output = ET.tostring(root, encoding='utf8', method='xml')
275-
do_outputs(minidom.parseString(output).toprettyxml(indent='\t'), output_mode, '.xml')
276-
else:
277-
print('[metabroker] ! error: cannot process map mode of <' + my_map + '>')
215+
# open map file and find out mode
216+
try:
217+
with open(my_map, encoding='utf-8') as data_file:
218+
map_file = json.load(data_file)
219+
settings_data = map_file['Settings']
220+
map_data = map_file['Map']
221+
my_mode = settings_data['mode']
222+
except:
223+
raise
224+
# distinguish format for output
225+
if my_mode == 'json':
226+
# try parse all possible metadata files:
227+
for file in os.listdir(input_dir):
228+
if os.path.basename(file).startswith('metadata_'):
229+
json_output = {}
230+
with open(os.path.join(input_dir, file), encoding='utf-8') as data_file:
231+
test_data = json.load(data_file)
232+
for element in test_data:
233+
try:
234+
map_json(element, test_data[element], map_data, json_output)
235+
except:
236+
raise
237+
##do_outputs(json_output, output_mode, 'o2r_'+os.path.splitext(file)[0], '.json')
238+
do_outputs(json_output, output_mode, settings_data['outputfile'], '.json')
239+
elif my_mode == 'txt':
240+
# to do: handle txt based maps like bagit
241+
txt_output = ''
242+
do_outputs(txt_output, output_mode, '.txt')
243+
elif my_mode == 'xml':
244+
root = ET.Element(settings_data['root'])
245+
# to do: generify for complex xml maps
246+
root.set('xmlns', settings_data['root@xmlns'])
247+
root.set('xmlns:xsi', settings_data['root@xmlns:xsi'])
248+
root.set('xsi:schemaLocation', settings_data['root@xsi:schemaLocation'])
249+
with open(os.path.join('tests', 'meta_test1.json'), encoding='utf-8') as data_file:
250+
test_data = json.load(data_file)
251+
for element in test_data:
252+
try:
253+
map_xml(element, test_data[element], map_data, root)
254+
except:
255+
raise
256+
output = ET.tostring(root, encoding='utf8', method='xml')
257+
do_outputs(minidom.parseString(output).toprettyxml(indent='\t'), output_mode, '.xml')
258+
else:
259+
status_note('! error: cannot process map mode of <' + my_map + '>')

broker/requirements.txt

Whitespace-only changes.
File renamed without changes.

extract/metaextract.py

+1-5
Original file line numberDiff line numberDiff line change
@@ -342,18 +342,14 @@ def start(**kwargs):
342342
rule_set_rmd_multiline = ['\t'.join(('yaml', r'---\n(.*?)\n---\n')),
343343
'\t'.join(('rblock', r'\`{3}(.*)\`{3}'))]
344344
# other parameters
345-
##global packlist_geosci
346-
##packlist_geosci = 'extract/list_geosci.txt'
347-
##global packlist_crantop100
348-
##packlist_crantop100 = 'extract/list_crantop100.txt'
349345
nr = 0 # number of files processed
350346
if skip_orcid:
351347
status_note('orcid api search disabled...')
352348
global md_paper_source
353349
md_paper_source = ''
354350
# md_bbox_list = {}
355351
global MASTER_MD_DICT
356-
MASTER_MD_DICT = {} # todo: this one is being updated per function call
352+
MASTER_MD_DICT = {} # this one is being updated per function call
357353
bagit_txt_file = None
358354
global compare_extracted
359355
compare_extracted = {} # dict for evaluations to find best metafile for main output

o2rmeta.py

+12-4
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@ def status_note(msg, **kwargs):
3939

4040
# Main
4141
if __name__ == "__main__":
42-
if sys.version_info[0] < 3 and sys.version_info[1] < 6: # target py36
43-
status_note('requires python 3.6')
42+
if sys.version_info[0] < 3 and sys.version_info[1] < 4: # target py36
43+
status_note('requires python 3.4')
4444
sys.exit(0)
4545
else:
4646
# arg parse setup:
@@ -61,6 +61,14 @@ def status_note(msg, **kwargs):
6161
extractor.add_argument('-m', '--metafiles', help='output all metafiles', action='store_true', default=False,
6262
required=False)
6363
# - - - - - - - - - - - - - - - - - -
64+
broker = subparsers.add_parser("broker")
65+
broker.add_argument("-i", "--inputdir", type=str, required=True)
66+
broker.add_argument("-m", "--map", type=str, required=True)
67+
group = broker.add_mutually_exclusive_group(required=True)
68+
group.add_argument('-o', '--outputdir', help='output directory for brokering docs')
69+
group.add_argument('-s', '--outputtostdout', help='output the result of the brokering to stdout',
70+
action='store_true', default=False)
71+
# - - - - - - - - - - - - - - - - - -
6472
validator = subparsers.add_parser("validate")
6573
validator.add_argument("-s", "--schema", type=str, required=True)
6674
validator.add_argument("-c", "--candidate", type=str, required=True)
@@ -74,7 +82,7 @@ def status_note(msg, **kwargs):
7482
except OSError:
7583
pass
7684
status_note(''.join(('v', str(my_version), ' - ', str(my_mod))), debug=argsd['debug'])
77-
status_note(''.join(('running under python ', str(sys.version_info[0]), '.' , str(sys.version_info[1]), '.', str(
85+
status_note(''.join(('running under python ', str(sys.version_info[0]), '.', str(sys.version_info[1]), '.', str(
7886
sys.version_info[2]))), debug=argsd['debug'])
7987
status_note(''.join(('received arguments: ', str(argsd))), debug=argsd['debug'])
8088
try:
@@ -83,7 +91,7 @@ def status_note(msg, **kwargs):
8391
metaextract.start(i=argsd['inputdir'], o=argsd['outputdir'], s=argsd['outputtostdout'], xo=argsd['skiporcid'], e=argsd['ercid'], m=argsd['metafiles'], xml=argsd['modexml'])
8492
elif argsd['tool'] == "broker":
8593
status_note('launching broker')
86-
print('TBD') # todo
94+
metabroker.start(i=argsd['inputdir'], o=argsd['outputdir'], s=argsd['outputtostdout'], m=argsd['map'])
8795
elif argsd['tool'] == "validate":
8896
status_note('launching validator')
8997
metavalidate.start(s=argsd['schema'], c=argsd['candidate'])

validate/metavalidate.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ def xml_validate(c, s, bln_c_http, bln_s_http):
7575

7676

7777
def status_note(msg):
78-
print(''.join(('[validate] ', msg)))
78+
print(''.join(('[o2rmeta][validate] ', msg)))
7979

8080

8181
# main

0 commit comments

Comments
 (0)