1
- '''
2
- Copyright (c) 2016 - o2r project
3
-
4
- Licensed under the Apache License, Version 2.0 (the "License");
5
- you may not use this file except in compliance with the License.
6
- You may obtain a copy of the License at
7
-
8
- http://www.apache.org/licenses/LICENSE-2.0
9
-
10
- Unless required by applicable law or agreed to in writing, software
11
- distributed under the License is distributed on an "AS IS" BASIS,
12
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- See the License for the specific language governing permissions and
14
- limitations under the License.
15
-
16
- '''
17
-
18
- import argparse
19
1
import base64
20
2
import json
21
- import sys
22
3
import urllib .request
23
4
24
5
from lxml import etree
25
6
26
7
27
8
def parse_from_unicode (unicode_str ):
9
+ utf8_parser = etree .XMLParser (encoding = 'utf-8' )
28
10
s = unicode_str .encode ('utf-8' )
29
11
return etree .fromstring (s , parser = utf8_parser )
30
12
31
13
def qu (q_type ,q_string ,q_base ):
32
14
# check if json url has been parsed correctly
33
15
if q_base .startswith ('\\ url{' ):
34
16
q_base = q_base [5 :].replace ('}' , '' )
35
- accepted = ['doi' ,'creator' ] #<-- generify
17
+ accepted = ['doi' , 'creator' ]
36
18
if q_type in accepted :
37
19
# datacite solr query encoding ideolect:
38
20
my_query = str .encode ('q=' + q_type + '%3A' + q_string )
@@ -47,43 +29,39 @@ def qu(q_type,q_string,q_base):
47
29
print ('query type not available' )
48
30
return None
49
31
32
+
33
+ def status_note (msg ):
34
+ print ('' .join (('[o2rmeta][harvest] ' , str (msg ))))
35
+
36
+
50
37
# main:
51
- if __name__ == "__main__" :
52
- if sys .version_info [0 ] < 3 :
53
- # py2
54
- print ('requires py3k or later' )
55
- sys .exit ()
56
- else :
57
- parser = argparse .ArgumentParser (description = 'description' )
58
- parser .add_argument ('-i' , '--input' , help = 'type of provided metadata element for input, e.g. doi or creator' , required = True )
59
- parser .add_argument ('-q' , '--query' , help = 'query string' , required = True )
60
- args = parser .parse_args ()
61
- argsdict = vars (args )
62
- i = argsdict ['input' ]
63
- q = argsdict ['query' ]
64
- try :
65
- with open ('bases.json' , encoding = 'utf-8' ) as data_file :
66
- bases = json .load (data_file )
67
- settings_data = bases ['Settings' ]
68
- baseurl_data = bases ['BaseURLs' ]
69
- except :
70
- raise
71
- print ('[metaharvest] starting request' )
72
- #test datacite
73
- my_base = 'DataCite' #<-- make this parsed arg
74
- try :
75
- result = qu (i .lower (), q , baseurl_data [my_base ]['url' ] + baseurl_data [my_base ]['default_parameter' ])
76
- print ('[metaharvest] !debug using ' + result [:128 ] + ' ...' )
77
- utf8_parser = etree .XMLParser (encoding = 'utf-8' )
78
- tree = parse_from_unicode (result )
79
- # e.g. return author from datacite creatorName
80
- output = {}
81
- for node in tree .xpath ('//ns:creatorName' , namespaces = {'ns' : 'http://datacite.org/schema/kernel-3' }):
82
- output ['authorName' ] = node .text
83
- json_output = json .dumps (output )
84
- #output if not empty
85
- if str (json_output ) != '{}' :
86
- print (str (json_output ))
87
- except :
88
- raise
89
- #pass
38
+ def start (** kwargs ):
39
+ global e
40
+ e = kwargs .get ('e' , None )
41
+ global q
42
+ q = kwargs .get ('q' , None )
43
+ try :
44
+ with open ('harvest/bases.json' , encoding = 'utf-8' ) as data_file :
45
+ bases = json .load (data_file )
46
+ settings_data = bases ['Settings' ]
47
+ baseurl_data = bases ['BaseURLs' ]
48
+ except :
49
+ raise
50
+ status_note ('new request' )
51
+ # demo datacite
52
+ my_base = 'DataCite'
53
+ # todo: argparse base endpoint
54
+ try :
55
+ result = qu (e .lower (), q , baseurl_data [my_base ]['url' ] + baseurl_data [my_base ]['default_parameter' ])
56
+ status_note ('!debug using ' + result [:256 ] + ' ...' )
57
+ tree = parse_from_unicode (result )
58
+ # e.g. return author from datacite creatorName
59
+ output = {}
60
+ for node in tree .xpath ('//ns:creatorName' , namespaces = {'ns' : 'http://datacite.org/schema/kernel-3' }):
61
+ output ['authorName' ] = node .text
62
+ json_output = json .dumps (output )
63
+ # output if not empty
64
+ if str (json_output ) != '{}' :
65
+ print (str (json_output ))
66
+ except :
67
+ raise
0 commit comments