Skip to content

Commit

Permalink
feat: support multiple DOI's
Browse files Browse the repository at this point in the history
  • Loading branch information
rmfranken committed Dec 17, 2024
1 parent 70bcac9 commit a884cbc
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 34 deletions.
66 changes: 34 additions & 32 deletions gimie/parsers/cff.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,21 +34,22 @@ def __init__(self, subject: str):
super().__init__(subject)

def parse(self, data: bytes) -> Graph:
"""Extracts a DOI link and list of authors from a CFF file and returns a
graph with a single triple <subject> <schema:citation> <doi>
"""Extracts DOIs and list of authors from a CFF file and returns a
graph with triples <subject> <schema:citation> <doi>
and a number of author objects with <schema:name> and <md4i:orcid> values.
If no DOI is found, it will not be included in the graph.
If no authors are found, it will not be included in the graph.
If neither authors nor DOI are found, an empty graph is returned.
If no DOIs are found, they will not be included in the graph.
If no authors are found, they will not be included in the graph.
If neither authors nor DOIs are found, an empty graph is returned.
"""
extracted_cff_triples = Graph()
doi = get_cff_doi(data)
dois = get_cff_doi(data)
authors = get_cff_authors(data)

if doi:
extracted_cff_triples.add(
(self.subject, SDO.citation, URIRef(doi))
)
if dois:
for doi in dois:
extracted_cff_triples.add(
(self.subject, SDO.citation, URIRef(doi))
)
if not authors:
return extracted_cff_triples
for author in authors:
Expand Down Expand Up @@ -119,8 +120,8 @@ def doi_to_url(doi: str) -> str:
return f"https://doi.org/{doi_match}"


def get_cff_doi(data: bytes) -> Optional[str]:
"""Given a CFF file, returns the DOI, if any.
def get_cff_doi(data: bytes) -> Optional[List[str]]:
"""Given a CFF file, returns a list of DOIs, if any.
Parameters
----------
Expand All @@ -129,13 +130,15 @@ def get_cff_doi(data: bytes) -> Optional[str]:
Returns
-------
str, optional
doi formatted as a valid url
list of str, optional
DOIs formatted as valid URLs
Examples
--------
>>> get_cff_doi(bytes("identifiers:\\n - type: doi\\n value: 10.5281/zenodo.1234", encoding="utf8"))
'https://doi.org/10.5281/zenodo.1234'
>>> get_cff_doi(bytes("identifiers:\\n - type: doi\\n value: 10.5281/zenodo.1234\\n - type: doi\\n value: 10.5281/zenodo.5678", encoding="utf8"))
['https://doi.org/10.5281/zenodo.1234', 'https://doi.org/10.5281/zenodo.5678']
>>> get_cff_doi(bytes("identifiers:\\n - type: doi\\n value: 10.5281/zenodo.9012", encoding="utf8"))
['https://doi.org/10.5281/zenodo.9012']
>>> get_cff_doi(bytes("abc: def", encoding="utf8"))
"""

Expand All @@ -144,25 +147,24 @@ def get_cff_doi(data: bytes) -> Optional[str]:
except yaml.scanner.ScannerError:
logger.warning("cannot read CITATION.cff, skipped.")
return None

doi_urls = []
try:
identifiers = cff.get("identifiers", [])
doi_identifier = next(
(id for id in identifiers if id.get("type") == "doi"), None
)
if doi_identifier:
doi_url = doi_to_url(doi_identifier["value"])
else:
raise KeyError("No DOI found in identifiers")
# No doi in cff file
for identifier in identifiers:
if identifier.get("type") == "doi":
try:
doi_url = doi_to_url(identifier["value"])
doi_urls.append(doi_url)
except ValueError as err:
logger.warning(err)
except (KeyError, TypeError):
logger.warning("CITATION.cff does not contain a 'doi' key.")
doi_url = None
# doi is malformed
except ValueError as err:
logger.warning(err)
doi_url = None

return doi_url
logger.warning(
"CITATION.cff does not contain a valid 'identifiers' key."
)
return None

return doi_urls if doi_urls else None


def get_cff_authors(data: bytes) -> Optional[List[dict[str, str]]]:
Expand Down
12 changes: 10 additions & 2 deletions tests/test_cff.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,18 @@ def test_parse_doi():
identifiers:
- type: doi
value: 10.5281/zenodo.3555620
- type: doi
value: 10.21105/joss.01274
"""
obj = next(
parsed_dois = list(
CffParser(subject=URIRef("https://example.org/"))
.parse(data=cff_file)
.objects()
)
assert URIRef("https://doi.org/10.5281/zenodo.3555620") == obj
expected_dois = [
URIRef("https://doi.org/10.5281/zenodo.3555620"),
URIRef("https://doi.org/10.21105/joss.01274"),
]
# parsed_dois already contains all parsed DOI objects
for doi in expected_dois:
assert doi in parsed_dois

0 comments on commit a884cbc

Please sign in to comment.