diff --git a/invenio.cfg b/invenio.cfg index 2292e71..a250828 100644 --- a/invenio.cfg +++ b/invenio.cfg @@ -19,8 +19,8 @@ from cds_rdm.permissions import ( CDSRDMPreservationSyncPermissionPolicy, ) from cds_rdm.files import storage_factory -from invenio_app_rdm.config import CELERY_BEAT_SCHEDULE as APP_RDM_CELERY_BEAT_SCHEDULE from celery.schedules import crontab +from invenio_app_rdm.config import STATS_EVENTS as _APP_RDM_STATS_EVENTS from invenio_vocabularies.services.custom_fields import VocabularyCF from invenio_records_resources.services.custom_fields import KeywordCF from invenio_rdm_records.config import ( @@ -550,3 +550,11 @@ VOCABULARIES_NAMES_SCHEMES = { "lcds": {"label": _("CDS"), "validator": schemes.is_legacy_cds, "datacite": "CDS"}, } """Names allowed identifier schemes.""" + + +# Invenio Stats +# ============= + +# We override the templates to add new fields needed for the migrated statistic events +_APP_RDM_STATS_EVENTS["file-download"]["templates"] = "cds_rdm.stats.templates.events.file_download" +_APP_RDM_STATS_EVENTS["record-view"]["templates"] = "cds_rdm.stats.templates.events.record_view" diff --git a/site/cds_rdm/stats/__init__.py b/site/cds_rdm/stats/__init__.py new file mode 100644 index 0000000..18630a0 --- /dev/null +++ b/site/cds_rdm/stats/__init__.py @@ -0,0 +1,8 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2024 CERN. +# +# CDS-RDM is free software; you can redistribute it and/or modify it under +# the terms of the MIT License; see LICENSE file for more details. + +"""CDS-RDM Statistics search index templates.""" diff --git a/site/cds_rdm/stats/templates/__init__.py b/site/cds_rdm/stats/templates/__init__.py new file mode 100644 index 0000000..18630a0 --- /dev/null +++ b/site/cds_rdm/stats/templates/__init__.py @@ -0,0 +1,8 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2024 CERN. +# +# CDS-RDM is free software; you can redistribute it and/or modify it under +# the terms of the MIT License; see LICENSE file for more details. + +"""CDS-RDM Statistics search index templates.""" diff --git a/site/cds_rdm/stats/templates/events/__init__.py b/site/cds_rdm/stats/templates/events/__init__.py new file mode 100644 index 0000000..5081b75 --- /dev/null +++ b/site/cds_rdm/stats/templates/events/__init__.py @@ -0,0 +1,8 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2024 CERN. +# +# CDS-RDM is free software; you can redistribute it and/or modify it under +# the terms of the MIT License; see LICENSE file for more details. + +"""Statistics events search index templates.""" diff --git a/site/cds_rdm/stats/templates/events/file_download/__init__.py b/site/cds_rdm/stats/templates/events/file_download/__init__.py new file mode 100644 index 0000000..e2b7914 --- /dev/null +++ b/site/cds_rdm/stats/templates/events/file_download/__init__.py @@ -0,0 +1,8 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2024 CERN. +# +# CDS-RDM is free software; you can redistribute it and/or modify it under +# the terms of the MIT License; see LICENSE file for more details. + +"""File download event search index templates.""" diff --git a/site/cds_rdm/stats/templates/events/file_download/os-v2/__init__.py b/site/cds_rdm/stats/templates/events/file_download/os-v2/__init__.py new file mode 100644 index 0000000..a1f6ef6 --- /dev/null +++ b/site/cds_rdm/stats/templates/events/file_download/os-v2/__init__.py @@ -0,0 +1,8 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2024 CERN. +# +# CDS-RDM is free software; you can redistribute it and/or modify it under +# the terms of the MIT License; see LICENSE file for more details. + +"""File download event OpenSearch index templates.""" diff --git a/site/cds_rdm/stats/templates/events/file_download/os-v2/file-download-v1.0.0.json b/site/cds_rdm/stats/templates/events/file_download/os-v2/file-download-v1.0.0.json new file mode 100644 index 0000000..034905d --- /dev/null +++ b/site/cds_rdm/stats/templates/events/file_download/os-v2/file-download-v1.0.0.json @@ -0,0 +1,104 @@ +{ + "index_patterns": ["__SEARCH_INDEX_PREFIX__events-stats-file-download-*"], + "settings": { + "index": { + "refresh_interval": "5s" + } + }, + "mappings": { + "dynamic_templates": [ + { + "date_fields": { + "match_mapping_type": "date", + "mapping": { + "type": "date", + "format": "strict_date_hour_minute_second" + } + } + } + ], + "date_detection": false, + "dynamic": "strict", + "numeric_detection": false, + "properties": { + "timestamp": { + "type": "date", + "format": "strict_date_hour_minute_second" + }, + "bucket_id": { + "type": "keyword" + }, + "file_id": { + "type": "keyword" + }, + "file_key": { + "type": "keyword" + }, + "unique_id": { + "type": "keyword" + }, + "country": { + "type": "keyword" + }, + "visitor_id": { + "type": "keyword" + }, + "is_machine": { + "type": "boolean" + }, + "is_robot": { + "type": "boolean" + }, + "unique_session_id": { + "type": "keyword" + }, + "size": { + "type": "double" + }, + "referrer": { + "type": "keyword" + }, + "ip_address": { + "type": "keyword" + }, + "user_agent": { + "type": "keyword" + }, + "user_id": { + "type": "keyword" + }, + "session_id": { + "type": "keyword" + }, + "record_id": { + "type": "keyword" + }, + "recid": { + "type": "keyword" + }, + "parent_id": { + "type": "keyword" + }, + "parent_recid": { + "type": "keyword" + }, + "via_api": { + "type": "boolean" + }, + "is_lcds": { + "type": "boolean", + "description": "This field marks all statistical events that have been migrated from the legacy CDS system." + }, + "before_COUNTER": { + "type": "boolean", + "description": "This field applies to all migrated events where no information was available to determine whether they were human or robot events. This was later resolved with the implementation of a proper robot-checking mechanism, ensuring COUNTER compliance." + }, + "updated_timestamp": { + "type": "date" + } + } + }, + "aliases": { + "__SEARCH_INDEX_PREFIX__events-stats-file-download": {} + } +} diff --git a/site/cds_rdm/stats/templates/events/record_view/__init__.py b/site/cds_rdm/stats/templates/events/record_view/__init__.py new file mode 100644 index 0000000..b9a17c1 --- /dev/null +++ b/site/cds_rdm/stats/templates/events/record_view/__init__.py @@ -0,0 +1,8 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2024 CERN. +# +# CDS-RDM is free software; you can redistribute it and/or modify it under +# the terms of the MIT License; see LICENSE file for more details. + +"""Record views search index templates.""" diff --git a/site/cds_rdm/stats/templates/events/record_view/os-v2/__init__.py b/site/cds_rdm/stats/templates/events/record_view/os-v2/__init__.py new file mode 100644 index 0000000..7f29500 --- /dev/null +++ b/site/cds_rdm/stats/templates/events/record_view/os-v2/__init__.py @@ -0,0 +1,8 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2024 CERN. +# +# CDS-RDM is free software; you can redistribute it and/or modify it under +# the terms of the MIT License; see LICENSE file for more details. + +"""Record view event OpenSearch index templates.""" diff --git a/site/cds_rdm/stats/templates/events/record_view/os-v2/record-view-v1.0.0.json b/site/cds_rdm/stats/templates/events/record_view/os-v2/record-view-v1.0.0.json new file mode 100644 index 0000000..66f9403 --- /dev/null +++ b/site/cds_rdm/stats/templates/events/record_view/os-v2/record-view-v1.0.0.json @@ -0,0 +1,84 @@ +{ + "index_patterns": ["__SEARCH_INDEX_PREFIX__events-stats-record-view-*"], + "settings": { + "index": { + "refresh_interval": "5s" + } + }, + "mappings": { + "date_detection": false, + "dynamic": "strict", + "numeric_detection": false, + "properties": { + "timestamp": { + "type": "date", + "format": "strict_date_hour_minute_second" + }, + "labels": { + "type": "keyword" + }, + "country": { + "type": "keyword" + }, + "visitor_id": { + "type": "keyword" + }, + "is_machine": { + "type": "boolean" + }, + "is_robot": { + "type": "boolean" + }, + "unique_id": { + "type": "keyword" + }, + "unique_session_id": { + "type": "keyword" + }, + "referrer": { + "type": "keyword" + }, + "ip_address": { + "type": "keyword" + }, + "user_agent": { + "type": "keyword" + }, + "user_id": { + "type": "keyword" + }, + "session_id":{ + "type": "keyword" + }, + "record_id": { + "type": "keyword" + }, + "recid": { + "type": "keyword" + }, + "parent_id": { + "type": "keyword" + }, + "parent_recid": { + "type": "keyword" + }, + "via_api": { + "type": "boolean" + }, + "is_lcds": { + "type": "boolean", + "description": "This field marks all statistical events that have been migrated from the legacy CDS system." + }, + "before_COUNTER": { + "type": "boolean", + "description": "This field applies to all migrated events where no information was available to determine whether they were human or robot events. This was later resolved with the implementation of a proper robot-checking mechanism, ensuring COUNTER compliance." + }, + "updated_timestamp": { + "type": "date" + } + } + }, + "aliases": { + "__SEARCH_INDEX_PREFIX__events-stats-record-view": {} + } +}