|
23 | 23 | import sys
|
24 | 24 | import urllib.request
|
25 | 25 | import uuid
|
| 26 | +from subprocess import Popen, PIPE, STDOUT |
26 | 27 | from xml.dom import minidom
|
27 | 28 |
|
28 | 29 | import dicttoxml
|
@@ -154,12 +155,55 @@ def get_r_package_class(package):
|
154 | 155 | #raise
|
155 | 156 | status_note(''.join(('! error while classifying r package:', str(exc.problem_mark), str(exc.problem))))
|
156 | 157 |
|
| 158 | + |
157 | 159 | def get_rel_path(input_path):
|
158 | 160 | # this is the path for output and display, relative to --basedir flag
|
159 | 161 | output_path = os.path.relpath(os.path.join(input_path), basedir).replace('\\', '/')
|
160 | 162 | return output_path
|
161 | 163 |
|
162 | 164 |
|
| 165 | +def get_rdata(filepath): |
| 166 | + # skip large files, unsuitable for text preview |
| 167 | + if os.stat(filepath).st_size / 1024 ** 2 > 200: |
| 168 | + status_note('[debug] skipping large RData file...') |
| 169 | + return None |
| 170 | + rhome_name = 'R_HOME' |
| 171 | + if rhome_name in os.environ: |
| 172 | + if os.environ[rhome_name] is not None: |
| 173 | + # OK try R_HOME value |
| 174 | + rpath = os.environ[rhome_name].replace("\\", "/") |
| 175 | + # add executable to path |
| 176 | + if not rpath.endswith('R') and not rpath.endswith('R.exe'): |
| 177 | + if os.path.exists(os.path.join(rpath, 'R.exe')): |
| 178 | + rpath = os.path.join(rpath, 'R.exe') |
| 179 | + else: |
| 180 | + if os.path.exists(os.path.join(rpath, 'R')): |
| 181 | + rpath = os.path.join(rpath, 'R') |
| 182 | + else: |
| 183 | + # Cannot take path |
| 184 | + status_note('[debug] invalid path to R executable') |
| 185 | + rpath = None |
| 186 | + if not os.path.exists(rpath): |
| 187 | + # Cannot take path |
| 188 | + status_note('[debug] invalid path to R installation') |
| 189 | + rpath = None |
| 190 | + else: |
| 191 | + status_note(''.join(('[debug] ', rhome_name, ' NULL'))) |
| 192 | + rpath = None |
| 193 | + else: |
| 194 | + status_note(''.join(('[debug] ', rhome_name, ' R_HOME env is not set...'))) |
| 195 | + return None |
| 196 | + try: |
| 197 | + if rpath is None: |
| 198 | + return None |
| 199 | + status_note('processing RData') |
| 200 | + p = Popen([rpath, '--vanilla', os.path.abspath(filepath)], stdout=PIPE, stdin=PIPE, stderr=STDOUT) |
| 201 | + out = p.communicate(input=b'ls.str()')[0].decode('ISO-8859-1')[:-4].split("> ls.str()")[1] |
| 202 | + return out[:40000] |
| 203 | + except: |
| 204 | + raise |
| 205 | + |
| 206 | + |
163 | 207 | def parse_bagitfile(file_path):
|
164 | 208 | txt_dict = {'bagittxt_file': file_path}
|
165 | 209 | with open(file_path) as f:
|
@@ -199,23 +243,6 @@ def parse_r(input_text, parser_dict):
|
199 | 243 | #status_note(''.join(('! error while parsing R input: ', str(exc.args[0]))))
|
200 | 244 |
|
201 | 245 |
|
202 |
| -def parse_rdata(filepath): |
203 |
| - try: |
204 |
| - # set test user: |
205 |
| - os.environ['R_USER'] = 'test' |
206 |
| - import rpy2.robjects as robjects |
207 |
| - my_robjs = [] |
208 |
| - # walk r objects stored in binary rdata file: |
209 |
| - for key in robjects.r['load'](filepath): |
210 |
| - my_robjs.append(str(key)) |
211 |
| - md_rdata = {'file': get_rel_path(filepath), 'rdata': my_robjs} |
212 |
| - if 'r_rdata' in MASTER_MD_DICT: |
213 |
| - MASTER_MD_DICT['r_rdata'].append(md_rdata) |
214 |
| - except: |
215 |
| - status_note('debug: <parse_rdata> errored') |
216 |
| - #raise |
217 |
| - |
218 |
| - |
219 | 246 | def parse_spatial(filepath, fformat):
|
220 | 247 | try:
|
221 | 248 | # <side_key> is an dict key in candidates to store all spatial files as list, other than finding the best candidate of spatial file
|
@@ -747,15 +774,17 @@ def start(**kwargs):
|
747 | 774 | if file_extension == '.txt':
|
748 | 775 | if file.lower() == 'bagit.txt':
|
749 | 776 | CANDIDATES_MD_DICT[new_id] = {}
|
750 |
| - CANDIDATES_MD_DICT[new_id][bagit_txt_file] = (parse_bagitfile(full_file_path)) |
| 777 | + CANDIDATES_MD_DICT[new_id][bagit_txt_file] = parse_bagitfile(full_file_path) |
751 | 778 | elif file_extension == '.r':
|
752 | 779 | extract_from_candidate(new_id, full_file_path, output_format, output_mode, False, rule_set_r)
|
753 | 780 | MASTER_MD_DICT['codefiles'].append(get_rel_path(full_file_path))
|
754 | 781 | elif file_extension == '.rmd':
|
755 | 782 | extract_from_candidate(new_id, full_file_path, output_format, output_mode, True, rule_set_rmd_multiline)
|
756 | 783 | parse_temporal(new_id, full_file_path, None, None)
|
757 |
| - #elif file_extension == '.rdata': |
758 |
| - # parse_rdata(full_file_path) |
| 784 | + elif file_extension == '.rdata': |
| 785 | + MASTER_MD_DICT['r_rdata'].append({'file': file, |
| 786 | + 'filepath': get_rel_path(full_file_path), |
| 787 | + 'rdata_preview': get_rdata(full_file_path)}) |
759 | 788 | elif file_extension == '.html':
|
760 | 789 | MASTER_MD_DICT['viewfile'].append(get_rel_path(full_file_path))
|
761 | 790 | else:
|
|
0 commit comments