Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rules: add beams as custom field #192

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 37 additions & 16 deletions cds_migrator_kit/migration_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,31 +381,52 @@ def _(x): # needed to avoid start time failure with lazy strings
KeywordCF(name="cern:projects", multiple=True),
KeywordCF(name="cern:facilities", multiple=True),
KeywordCF(name="cern:studies", multiple=True),
KeywordCF(name="cern:beams", multiple=True),
]

base_path = os.path.dirname(os.path.realpath(__file__))
logs_dir = os.path.join(base_path, "tmp/logs/")
CDS_MIGRATOR_KIT_LOGS_PATH = logs_dir
CDS_MIGRATOR_KIT_STREAM_CONFIG = "cds_migrator_kit/rdm/migration/streams.yaml"

from invenio_rdm_records.config import RDM_RECORDS_IDENTIFIERS_SCHEMES, always_valid, RDM_RECORDS_PERSONORG_SCHEMES
from invenio_rdm_records.config import (
RDM_RECORDS_IDENTIFIERS_SCHEMES,
always_valid,
RDM_RECORDS_PERSONORG_SCHEMES,
)
from cds_rdm import schemes

RDM_RECORDS_IDENTIFIERS_SCHEMES = {**RDM_RECORDS_IDENTIFIERS_SCHEMES,
**{"cds_ref": {"label": _("CDS Reference"),
"validator": always_valid,
"datacite": "CDS"},
"aleph": {"label": _("Aleph number"),
"validator": schemes.is_aleph,
"datacite": "ALEPH"},
"inspire": {"label": _("Inspire"),
"validator": schemes.is_inspire,
"datacite": "INSPIRE"}}}

RDM_RECORDS_PERSONORG_SCHEMES = {**RDM_RECORDS_PERSONORG_SCHEMES,
**{"inspire": {"label": _("Inspire"),
"validator": schemes.is_inspire,
"datacite": "INSPIRE"}}}
RDM_RECORDS_IDENTIFIERS_SCHEMES = {
**RDM_RECORDS_IDENTIFIERS_SCHEMES,
**{
"cds_ref": {
"label": _("CDS Reference"),
"validator": always_valid,
"datacite": "CDS",
},
"aleph": {
"label": _("Aleph number"),
"validator": schemes.is_aleph,
"datacite": "ALEPH",
},
"inspire": {
"label": _("Inspire"),
"validator": schemes.is_inspire,
"datacite": "INSPIRE",
},
},
}

RDM_RECORDS_PERSONORG_SCHEMES = {
**RDM_RECORDS_PERSONORG_SCHEMES,
**{
"inspire": {
"label": _("Inspire"),
"validator": schemes.is_inspire,
"datacite": "INSPIRE",
}
},
}


CDS_MIGRATOR_KIT_RECORD_STATS_STREAM_CONFIG = dict(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,14 @@ def subjects(self, key, value):
def custom_fields(self, key, value):
"""Translates custom fields."""
_custom_fields = self.get("custom_fields", {})
experiments, accelerators, projects, facilities, studies = [], [], [], [], []
experiments, accelerators, projects, facilities, studies, beams = (
[],
[],
[],
[],
[],
[],
)
if key == "693__":
if "e" in value and value.get("e"):
experiments += [StringValue(v).parse() for v in force_list(value.get("e"))]
Expand All @@ -225,20 +232,14 @@ def custom_fields(self, key, value):
if "s" in value and value.get("s"):
studies += [StringValue(v).parse() for v in force_list(value.get("s"))]
if "b" in value and value.get("b"):
# migrates beams field to subjects/keywords
_subjects = self.get("subjects", [])
subject_value = StringValue(value.get("a")).parse()
subject = {
"subject": subject_value,
}
_subjects.append(subject)
raise IgnoreKey("custom_fields")
beams += [StringValue(v).parse() for v in force_list(value.get("b"))]

_custom_fields["cern:experiments"] = experiments
_custom_fields["cern:accelerators"] = accelerators
_custom_fields["cern:projects"] = projects
_custom_fields["cern:facilities"] = facilities
_custom_fields["cern:studies"] = studies
_custom_fields["cern:beams"] = beams
return _custom_fields


Expand Down Expand Up @@ -277,8 +278,8 @@ def report_number(self, key, value):
def aleph_number(self, key, value):
"""Translates identifiers: ALEPH.

Attention: 035 might contain aleph number
https://github.com/CERNDocumentServer/cds-migrator-kit/issues/21
Attention: 035 might contain aleph number
https://github.com/CERNDocumentServer/cds-migrator-kit/issues/21
"""
aleph = StringValue(value.get("a")).parse()
if aleph:
Expand All @@ -290,16 +291,19 @@ def aleph_number(self, key, value):
def inspire_number(self, key, value):
"""Translates identifiers.

Attention: might contain aleph number
https://github.com/CERNDocumentServer/cds-migrator-kit/issues/21
Attention: might contain aleph number
https://github.com/CERNDocumentServer/cds-migrator-kit/issues/21
"""
id_value = StringValue(value.get("a")).parse()
scheme = StringValue(value.get("9")).parse()

if scheme.upper() != "INSPIRE":
raise UnexpectedValue(field=key, subfield="9",
message="INSPIRE ID SCHEME MISSING",
priority="warning")
raise UnexpectedValue(
field=key,
subfield="9",
message="INSPIRE ID SCHEME MISSING",
priority="warning",
)

if id_value:
return {"scheme": "inspire", "identifier": id_value}
Loading