Skip to content

Commit 41d9686

Browse files
author
7048730
committed
upd extract & broker
- fix description handling - upd broker input to take file path instead of dir - minor fixes - upd readme
1 parent c7e12ca commit 41d9686

File tree

6 files changed

+54
-46
lines changed

6 files changed

+54
-46
lines changed

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ broker/tests/all
44
metadata_raw.json
55
metadata_o2r.json
66
metadata_zenodo.json
7+
metadata_eudat.json
8+
metadata_mods.json
79
testerc.yml
810
erc.yml
911
erc_raw.yml

README.md

+4-3
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ o2r meta is designed for python 3.6 and supports python 3.4+.
3131
sudo add-apt-repository ppa:ubuntugis/ubuntugis-unstable -y
3232
sudo apt-get -qq update
3333
sudo apt-get install -y python3-dev
34+
sudo apt-get install -y libgdal1h
3435
sudo apt-get install -y libgdal-dev
3536
sudo apt-get build-dep -y python-gdal
3637
sudo apt-get install -y python-gdal
@@ -128,20 +129,20 @@ In _checking mode_ it returns missing metadata information for a target service
128129
Within o2r, the broker is used to translates between different standards for metadata sets. For example from extracted raw metadata to the o2r schema-compliant metadata. Other target outputs might DataCite XML or Zenodo JSON.
129130
Translation instructions as well as checklists are stored in json formatted files.
130131

131-
python o2rmeta.py broker -i <INPUT_DIR/FILE> -c <CHECKLIST_FILE>|-m <MAPPING_FILE> -s|-o <OUTPUT_DIR>
132+
python o2rmeta.py broker -i <INPUT_FILE> -c <CHECKLIST_FILE>|-m <MAPPING_FILE> -s|-o <OUTPUT_DIR>
132133

133134
Example calls:
134135

135136
python o2rmeta.py -debug broker -c broker/checks/zenodo-check.json -i schema/json/example_zenodo.json -o broker/tests/all
136137

137-
python o2rmeta.py -debug broker -m broker/mappings/zenodo-map.json -i broker/tests -o broker/tests/all
138+
python o2rmeta.py -debug broker -m broker/mappings/zenodo-map.json -i broker/tests/metadata_raw.json -o broker/tests/all
138139

139140
Explanation of the switches:
140141

141142

142143
+ `-c` <CHECKLIST_FILE> : required path to a json checklist file that holds checking instructions for the metadata. This switch is mutually exclusive with `-m`. At least one of them must be given.
143144
+ `-m` <MAPPING_FILE> : required path to a json mapping file that holds translation instructions for the metadata mappings. This switch is mutually exclusive with `-c`. At least one of them must be given.
144-
+ `-i` <INPUT_DIR> : path to input json when using `-c`-mode _or_ required starting path for recursive search for parsable files when using `-m`-mode.
145+
+ `-i` <INPUT_FILE> : path to input json file.
145146
+ `-s`: option to print out results to console. This switch is mutually exclusive with `-o`. At least one of them must be given.
146147
+ `-o` <OUTPUT_DIR> : required output path, where data should be saved. If the directory does not exist, it will be created on runtime. This switch is mutually exclusive with `-s`. At least one of them must be given.
147148

broker/metabroker.py

+34-37
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""
2-
Copyright (c) 2016 - o2r project
2+
Copyright (c) 2016, 2017 - o2r project
33
44
Licensed under the Apache License, Version 2.0 (the "License");
55
you may not use this file except in compliance with the License.
@@ -31,23 +31,21 @@ def check(checklist_pathfile, input_json):
3131
# prepare input filepath
3232
try:
3333
if os.path.isfile(input_json):
34-
##print("is file")
3534
with open(input_json, encoding='utf-8') as data_file:
3635
input_data = json.load(data_file)
37-
# open checklist file and find out mode
38-
output_dict = {'required': []}
39-
with open(checklist_pathfile, encoding='utf-8') as data_file:
40-
check_file = json.load(data_file)
41-
settings_data = check_file['Settings'] # json or xml
42-
checklist_data = check_file['Checklist']
43-
#my_mode = settings_data['mode']
44-
# todo:
45-
#check_data_conditions = check_file['Conditions']
46-
for x in checklist_data:
47-
if x not in input_data:
48-
output_dict['required'].append(x)
49-
do_outputs(output_dict, output_dir, settings_data['outputfile'])
50-
36+
# open checklist file and find out mode
37+
output_dict = {'required': []}
38+
with open(checklist_pathfile, encoding='utf-8') as data_file:
39+
check_file = json.load(data_file)
40+
settings_data = check_file['Settings'] # json or xml
41+
checklist_data = check_file['Checklist']
42+
#my_mode = settings_data['mode']
43+
# todo:
44+
#check_data_conditions = check_file['Conditions']
45+
for x in checklist_data:
46+
if x not in input_data:
47+
output_dict['required'].append(x)
48+
do_outputs(output_dict, output_dir, settings_data['outputfile'])
5149
except:
5250
raise
5351

@@ -66,12 +64,12 @@ def do_outputs(output_data, out_mode, out_name):
6664
if not os.path.exists(out_mode):
6765
os.makedirs(out_mode)
6866
with open(output_filename, 'w', encoding='utf-8') as outfile:
69-
# todo: add handling if mode is txt, xml, ...
7067
# for json:
7168
output_data = json.dumps(output_data, sort_keys=True, indent=4, separators=(',', ': '))
7269
outfile.write(str(output_data))
73-
status_note(''.join(
74-
(str(os.stat(output_filename).st_size), ' bytes written to ', os.path.abspath(output_filename))))
70+
# for xml:
71+
# TBD
72+
status_note(''.join((str(os.stat(output_filename).st_size), ' bytes written to ', os.path.abspath(output_filename))))
7573
except Exception as exc:
7674
status_note(''.join(('! error while creating outputs: ', exc.args[0])))
7775

@@ -113,7 +111,6 @@ def map_json(element, value, map_data, output_dict):
113111
for key in value:
114112
# ---<key:string>----------------------------------------------
115113
if type(key) is str:
116-
117114
if key in map_data:
118115
d = 0
119116
# ---<subkey:string>----------------------------------------------
@@ -153,7 +150,7 @@ def map_json(element, value, map_data, output_dict):
153150
output_dict[map_data[y]['needsParent']].append(value[c][y])
154151
# ---<key:dict>----------------------------------------------
155152
elif type(key) is dict:
156-
# as for 'authors'
153+
# e.g. for 'authors'
157154
location = ''
158155
temp = {}
159156
if type(key) is dict:
@@ -173,6 +170,7 @@ def map_json(element, value, map_data, output_dict):
173170

174171

175172
def map_xml(element, value, map_data, xml_root):
173+
seperator = '#'
176174
a = None
177175
try:
178176
if type(value) is list or type(value) is dict:
@@ -248,12 +246,11 @@ def status_note(msg):
248246

249247
# Main
250248
def start(**kwargs):
251-
global input_dir
252-
input_dir = kwargs.get('i', None)
249+
global input_file
250+
input_file = kwargs.get('i', None)
253251
global output_dir
254252
output_dir = kwargs.get('o', None)
255253
output_to_console = kwargs.get('s', None)
256-
seperator = '#' #<-- make this generic
257254
global my_check
258255
my_check = kwargs.get('c', None)
259256
global my_map
@@ -269,7 +266,7 @@ def start(**kwargs):
269266
# not possible currently because output arg group is on mutual exclusive
270267
output_mode = '@none'
271268
if my_check is not None:
272-
check(my_check, input_dir)
269+
check(my_check, input_file)
273270
if my_map is not None:
274271
# open map file and find out mode
275272
try:
@@ -282,18 +279,18 @@ def start(**kwargs):
282279
raise
283280
# distinguish format for output
284281
if my_mode == 'json':
285-
# try parse all possible metadata files:
286-
for file in os.listdir(input_dir):
287-
if os.path.basename(file).startswith('metadata_'):
288-
json_output = {}
289-
with open(os.path.join(input_dir, file), encoding='utf-8') as data_file:
290-
test_data = json.load(data_file)
291-
for element in test_data:
292-
try:
293-
map_json(element, test_data[element], map_data, json_output)
294-
except:
295-
raise
296-
do_outputs(json_output, output_mode, settings_data['outputfile'])
282+
# parse target file # try parse all possible metadata files:
283+
if not os.path.basename(input_file).startswith('metadata_'):
284+
status_note('Warning: inputfile does not look like a metadata file object')
285+
json_output = {}
286+
with open(os.path.join(input_file), encoding='utf-8') as data_file:
287+
test_data = json.load(data_file)
288+
for element in test_data:
289+
try:
290+
map_json(element, test_data[element], map_data, json_output)
291+
except:
292+
raise
293+
do_outputs(json_output, output_mode, settings_data['outputfile'])
297294
elif my_mode == 'txt':
298295
# to do: handle txt based maps like bagit
299296
txt_output = ''

extract/metaextract.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -188,8 +188,14 @@ def parse_yaml(input_text):
188188
# This is for R markdown files with yaml headers
189189
try:
190190
yaml_data_dict = yaml.safe_load(input_text)
191-
# get authors and possible ids // orcid
192191
if yaml_data_dict is not None:
192+
# model description / abstract:
193+
if 'description' in yaml_data_dict:
194+
if yaml_data_dict['description'] is not None:
195+
MASTER_MD_DICT['description'] = yaml_data_dict['description']
196+
else:
197+
if 'abstract' in yaml_data_dict:
198+
MASTER_MD_DICT['description'] = yaml_data_dict['abstract']
193199
# model author:
194200
if 'author' in yaml_data_dict:
195201
if type(yaml_data_dict['author']) is str:
@@ -653,7 +659,6 @@ def start(**kwargs):
653659
status_note(''.join(('processing ', os.path.join(root, file).replace('\\', '/'))), b=log_buffer)
654660
# new file / new source
655661
nr += 1
656-
657662
# interact with different file formats:
658663
if file_extension == '.txt':
659664
if file.lower() == 'bagit.txt':

o2rmeta.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def status_note(msg, **kwargs):
6868
maingroup = broker.add_mutually_exclusive_group(required=True)
6969
maingroup.add_argument("-c", "--check", type=str, required=False)
7070
maingroup.add_argument("-m", "--map", type=str, required=False)
71-
broker.add_argument("-i", "--inputdir", type=str, required=True)
71+
broker.add_argument("-i", "--inputfile", type=str, required=True)
7272
group = broker.add_mutually_exclusive_group(required=True)
7373
group.add_argument('-o', '--outputdir', type=str, help='output directory for brokering docs')
7474
group.add_argument('-s', '--outputtostdout', help='output the result of the brokering to stdout',
@@ -84,7 +84,7 @@ def status_note(msg, **kwargs):
8484
# - - - - - - - - - - - - - - - - - -
8585
args = parser.parse_args()
8686
argsd = vars(args)
87-
my_version = 25 # update me!
87+
my_version = 26 # update me!
8888
my_mod = ''
8989
try:
9090
my_mod = datetime.datetime.fromtimestamp(os.stat(__file__).st_mtime)
@@ -98,7 +98,7 @@ def status_note(msg, **kwargs):
9898
metaextract.start(i=argsd['inputdir'], o=argsd['outputdir'], s=argsd['outputtostdout'], xo=argsd['stayoffline'], e=argsd['ercid'], m=argsd['metafiles'], xml=argsd['modexml'])
9999
elif argsd['tool'] == "broker":
100100
status_note('launching broker')
101-
metabroker.start(c=argsd['check'], m=argsd['map'], i=argsd['inputdir'], o=argsd['outputdir'], s=argsd['outputtostdout'])
101+
metabroker.start(c=argsd['check'], m=argsd['map'], i=argsd['inputfile'], o=argsd['outputdir'], s=argsd['outputtostdout'])
102102
elif argsd['tool'] == "validate":
103103
status_note('launching validator')
104104
metavalidate.start(s=argsd['schema'], c=argsd['candidate'])

schema/json/dummy.json

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
{
2+
"access_right": "open",
23
"author": [],
34
"communities": [
45
{
@@ -36,10 +37,11 @@
3637
}
3738
},
3839
"keywords": [],
39-
"license": null,
40+
"license": "cc-by",
4041
"paperLanguage": [],
4142
"paperSource": null,
4243
"publicationDate": null,
44+
"publication_type": "other",
4345
"r_comment": [],
4446
"r_input": [],
4547
"r_output": [],
@@ -56,5 +58,6 @@
5658
"end": null
5759
},
5860
"title": null,
61+
"upload_type": "publication",
5962
"version": null
6063
}

0 commit comments

Comments
 (0)