@@ -62,20 +62,23 @@ def get_doi_http(md_title, md_author):
62
62
status_note ('requesting doi via crossref.org ...' )
63
63
my_params = {'query.title' : md_title , 'query.author' : md_author }
64
64
r = requests .get ('https://api.crossref.org/works' , params = my_params , timeout = 20 )
65
- #status_note('debug: <get_doi_http> GET ' + r.url)
66
65
status_note (' ' .join ((str (r .status_code ), r .reason )))
67
- if 'message' in r .json ():
68
- if 'items' in r .json ()['message' ]:
69
- if type (r .json ()['message' ]['items' ]) is list :
70
- # take first hit, best match
71
- if 'DOI' in r .json ()['message' ]['items' ][0 ]:
72
- return r .json ()['message' ]['items' ][0 ]['DOI' ]
66
+ if r is not None :
67
+ status_note ('debug: <get_doi_http> GET ' + r .url )
68
+ if 'message' in r .json ():
69
+ if 'items' in r .json ()['message' ]:
70
+ if type (r .json ()['message' ]['items' ]) is list :
71
+ # take first hit, best match
72
+ if 'DOI' in r .json ()['message' ]['items' ][0 ]:
73
+ return r .json ()['message' ]['items' ][0 ]['DOI' ]
73
74
except requests .exceptions .Timeout :
74
75
status_note ('http doi request: timeout' )
75
76
except requests .exceptions .TooManyRedirects :
76
77
status_note ('http doi request: too many redirects' )
77
78
except requests .exceptions .RequestException as e :
78
79
status_note ('http doi request: ' + str (e ))
80
+ except :
81
+ status_note ('! error while requesting doi' )
79
82
80
83
81
84
def get_orcid_http (txt_input , bln_sandbox ):
@@ -150,6 +153,11 @@ def get_r_package_class(package):
150
153
#raise
151
154
status_note ('' .join (('! error while classifying r package:' , str (exc .problem_mark ), str (exc .problem ))))
152
155
156
+ def get_rel_path (input_path ):
157
+ # this is the path for output and display, relative to --basedir flag
158
+ output_path = os .path .relpath (os .path .join (input_path ), basedir ).replace ('\\ ' , '/' )
159
+ return output_path
160
+
153
161
154
162
def parse_bagitfile (file_path ):
155
163
txt_dict = {'bagittxt_file' : file_path }
@@ -213,7 +221,7 @@ def parse_spatial(file_id, filepath, fformat):
213
221
if 'files' not in CANDIDATES_MD_DICT [file_id ]['spatial' ]:
214
222
key_files = {'files' : []}
215
223
CANDIDATES_MD_DICT [file_id ]['spatial' ] = key_files
216
- new_file_key ['source_file' ] = filepath
224
+ new_file_key ['source_file' ] = get_rel_path ( filepath )
217
225
new_file_key ['geojson' ] = {}
218
226
if coords is not None :
219
227
new_file_key ['geojson' ]['bbox' ] = coords .bounds
@@ -423,7 +431,7 @@ def extract_from_candidate(file_id, path_file, out_format, out_mode, multiline,
423
431
if s :
424
432
md_filepath = s .group (1 )
425
433
else :
426
- md_filepath = path_file
434
+ md_filepath = get_rel_path ( path_file )
427
435
md_record_date = datetime .datetime .today ().strftime ('%Y-%m-%d' )
428
436
data_dict = {'file' : {'filename' : md_file , 'filepath' : md_filepath , 'mimetype' : md_mime_type },
429
437
'ercIdentifier' : md_erc_id ,
@@ -569,6 +577,8 @@ def start(**kwargs):
569
577
input_dir = kwargs .get ('i' , None )
570
578
global md_erc_id
571
579
md_erc_id = kwargs .get ('e' , None )
580
+ global basedir
581
+ basedir = kwargs .get ('b' , None )
572
582
global stay_offline
573
583
stay_offline = kwargs .get ('xo' , None )
574
584
global metafiles_all
@@ -693,7 +703,7 @@ def start(**kwargs):
693
703
# give it a number
694
704
new_id = str (uuid .uuid4 ())
695
705
if os .path .isfile (full_file_path ) and full_file_path not in file_list_input_candidates :
696
- file_list_input_candidates .append (full_file_path )
706
+ file_list_input_candidates .append (get_rel_path ( full_file_path ) )
697
707
if nr < 50 :
698
708
# use buffering to prevent performance issues when parsing very large numbers of files
699
709
log_buffer = False
@@ -718,12 +728,12 @@ def start(**kwargs):
718
728
CANDIDATES_MD_DICT [new_id ][bagit_txt_file ] = (parse_bagitfile (full_file_path ))
719
729
elif file_extension == '.r' :
720
730
extract_from_candidate (new_id , full_file_path , output_format , output_mode , False , rule_set_r )
721
- MASTER_MD_DICT ['codefiles' ].append (full_file_path )
731
+ MASTER_MD_DICT ['codefiles' ].append (get_rel_path ( full_file_path ) )
722
732
elif file_extension == '.rmd' :
723
733
extract_from_candidate (new_id , full_file_path , output_format , output_mode , True , rule_set_rmd_multiline )
724
734
parse_temporal (new_id , full_file_path , None , None )
725
735
elif file_extension == '.html' :
726
- MASTER_MD_DICT ['viewfile' ].append (full_file_path )
736
+ MASTER_MD_DICT ['viewfile' ].append (get_rel_path ( full_file_path ) )
727
737
else :
728
738
parse_spatial (new_id , full_file_path , file_extension )
729
739
status_note ('' .join ((str (nr ), ' files processed' )))
@@ -788,4 +798,4 @@ def start(**kwargs):
788
798
output_extraction (MASTER_MD_DICT , output_format , output_mode , os .path .join (output_dir , main_metadata_filename ))
789
799
get_ercspec_http (output_dir )
790
800
# Write erc.yml according to ERC spec:
791
- ercyml_write (output_dir )
801
+ # ercyml_write(output_dir)
0 commit comments