-
Notifications
You must be signed in to change notification settings - Fork 12
/
schema_transformations_utils.py
66 lines (46 loc) · 2.03 KB
/
schema_transformations_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
"""JSON schema transformation utility functions."""
__copyright__ = 'Copyright (c) 2024, Utrecht University'
__license__ = 'GPLv3, see LICENSE'
import re
def correctify_orcid(org_orcid: str) -> str | None:
"""Correct illformatted ORCID."""
# Get rid of all spaces.
orcid = org_orcid.replace(' ', '')
# Upper-case X.
orcid = orcid.replace('x', 'X')
# The last part should hold a valid id like eg: 1234-1234-1234-123X.
# If not, it is impossible to correct it to the valid orcid format
orcs = orcid.split('/')
if not re.search("^[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{3}[0-9X]$", orcs[-1]):
return None
return "https://orcid.org/{}".format(orcs[-1])
def correctify_scopus(org_scopus: str) -> str | None:
"""Correct illformatted Scopus."""
# Get rid of all spaces.
new_scopus = org_scopus.replace(' ', '')
if not re.search(r"^\d{1,11}$", new_scopus):
return None
return new_scopus
def correctify_isni(org_isni: str) -> str | None:
"""Correct ill-formatted ISNI."""
# Remove all spaces.
new_isni = org_isni.replace(' ', '')
# Upper-case X.
new_isni = new_isni.replace('x', 'X')
# The last part should hold a valid id like eg: 123412341234123X.
# If not, it is impossible to correct it to the valid isni format
new_isni_split = new_isni.split('/')
if not re.search("^[0-9]{15}[0-9X]$", new_isni_split[-1]):
return None
return "https://isni.org/isni/{}".format(new_isni_split[-1])
def correctify_researcher_id(org_researcher_id: str) -> str:
"""Correct illformatted ResearcherID."""
# Get rid of all spaces.
researcher_id = org_researcher_id.replace(' ', '')
# The last part should hold a valid id like eg: A-1234-1234
# If not, it is impossible to correct it to the valid ResearcherID format
orcs = researcher_id.split('/')
if not re.search("^[A-Z]-[0-9]{4}-[0-9]{4}$", orcs[-1]):
# Return original value.
return org_researcher_id
return "https://www.researcherid.com/rid/{}".format(orcs[-1])