Skip to content

Commit cbeeb9b

Browse files
author
7048730
committed
upd extract
- add r_rdata md, including file and objects extracted from binary rdata - require rpy2 package
1 parent 96aec2f commit cbeeb9b

File tree

3 files changed

+27
-5
lines changed

3 files changed

+27
-5
lines changed

extract/metaextract.py

+24-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""
2-
Copyright (c) 2016 - o2r project
2+
Copyright (c) 2016, 2017 - o2r project
33
44
Licensed under the Apache License, Version 2.0 (the "License");
55
you may not use this file except in compliance with the License.
@@ -34,6 +34,7 @@
3434

3535

3636
def get_ercspec_http(spec_output_dir):
37+
# use this function to configure a specification file that needs to be included
3738
if stay_offline:
3839
status_note('skipping erc spec download (http disabled)')
3940
return None
@@ -64,7 +65,7 @@ def get_doi_http(md_title, md_author):
6465
r = requests.get('https://api.crossref.org/works', params=my_params, timeout=20)
6566
status_note(' '.join((str(r.status_code), r.reason)))
6667
if r is not None:
67-
status_note('debug: <get_doi_http> GET ' + r.url)
68+
status_note(''.join(('debug: <get_doi_http> GET')))
6869
if 'message' in r.json():
6970
if 'items' in r.json()['message']:
7071
if type(r.json()['message']['items']) is list:
@@ -76,7 +77,7 @@ def get_doi_http(md_title, md_author):
7677
except requests.exceptions.TooManyRedirects:
7778
status_note('http doi request: too many redirects')
7879
except requests.exceptions.RequestException as e:
79-
status_note('http doi request: ' + str(e))
80+
status_note(''.join(('http doi request: ', str(e))))
8081
except:
8182
status_note('! error while requesting doi')
8283

@@ -107,7 +108,7 @@ def get_orcid_http(txt_input, bln_sandbox):
107108
except requests.exceptions.TooManyRedirects:
108109
status_note('http orcid request: too many redirects')
109110
except requests.exceptions.RequestException as e:
110-
status_note('http orcid request: ' + str(e))
111+
status_note(''.join(('http orcid request: ', str(e))))
111112

112113

113114
def get_r_package_class(package):
@@ -198,6 +199,22 @@ def parse_r(input_text, parser_dict):
198199
#status_note(''.join(('! error while parsing R input: ', str(exc.args[0]))))
199200

200201

202+
def parse_rdata(filepath):
203+
try:
204+
# set test user:
205+
os.environ['R_USER'] = 'test'
206+
import rpy2.robjects as robjects
207+
my_robjs = []
208+
# walk r objects stored in binary rdata file:
209+
for key in robjects.r['load'](filepath):
210+
my_robjs.append(str(key))
211+
md_rdata = {'file': get_rel_path(filepath), 'rdata': my_robjs}
212+
if 'r_rdata' in MASTER_MD_DICT:
213+
MASTER_MD_DICT['r_rdata'].append(md_rdata)
214+
except:
215+
raise
216+
217+
201218
def parse_spatial(filepath, fformat):
202219
try:
203220
# <side_key> is an dict key in candidates to store all spatial files as list, other than finding the best candidate of spatial file
@@ -671,6 +688,7 @@ def start(**kwargs):
671688
'r_comment': [],
672689
'r_input': [],
673690
'r_output': [],
691+
'r_rdata': [],
674692
'recordDateCreated': None,
675693
'researchQuestions': [],
676694
'researchHypotheses': [],
@@ -735,6 +753,8 @@ def start(**kwargs):
735753
elif file_extension == '.rmd':
736754
extract_from_candidate(new_id, full_file_path, output_format, output_mode, True, rule_set_rmd_multiline)
737755
parse_temporal(new_id, full_file_path, None, None)
756+
elif file_extension == '.rdata':
757+
parse_rdata(full_file_path)
738758
elif file_extension == '.html':
739759
MASTER_MD_DICT['viewfile'].append(get_rel_path(full_file_path))
740760
else:

requirements.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,5 @@ guess_language-spirit
55
jsonschema
66
lxml
77
python-dateutil
8-
requests
8+
requests
9+
rpy2

schema/json/dummy.json

+1
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
"r_comment": [],
5353
"r_input": [],
5454
"r_output": [],
55+
"r_rdata": [],
5556
"recordDateCreated": null,
5657
"researchHypotheses": [],
5758
"researchQuestions": [],

0 commit comments

Comments
 (0)