From 5645f6d7df6757652b838adca03e921b0cb88ca5 Mon Sep 17 00:00:00 2001 From: rmfranken Date: Wed, 20 Nov 2024 11:28:44 +0100 Subject: [PATCH] feat: check if orcid is valid before writing --- gimie/parsers/cff.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/gimie/parsers/cff.py b/gimie/parsers/cff.py index e0d1202..b6aa2e2 100644 --- a/gimie/parsers/cff.py +++ b/gimie/parsers/cff.py @@ -51,13 +51,16 @@ def parse(self, data: bytes) -> Graph: if not authors: return extracted_cff_triples for author in authors: - if author["orcid"]: + orcid = URIRef(author["orcid"]) + if re.match( + r"https:\/\/orcid.org\/\d{4}-\d{4}-\d{4}-\d{4}", str(orcid) + ): extracted_cff_triples.add( - (self.subject, SDO.author, URIRef(author["orcid"])) + (self.subject, SDO.author, URIRef(orcid)) ) extracted_cff_triples.add( ( - URIRef(author["orcid"]), + URIRef(orcid), SDO.name, Literal( author["given-names"] @@ -68,21 +71,19 @@ def parse(self, data: bytes) -> Graph: ) extracted_cff_triples.add( ( - URIRef(author["orcid"]), + orcid, MD4I.orcidId, - Literal(author["orcid"]), + Literal(orcid), ) ) extracted_cff_triples.add( ( - URIRef(author["orcid"]), + orcid, SDO.affiliation, Literal(author["affiliation"]), ) ) - extracted_cff_triples.add( - (URIRef(author["orcid"]), RDF.type, SDO.Person) - ) + extracted_cff_triples.add((orcid, RDF.type, SDO.Person)) return extracted_cff_triples