From 27a30d413e8ed9bc5451fa07e24575ebc7cb6e86 Mon Sep 17 00:00:00 2001 From: mutantsan Date: Thu, 29 Feb 2024 13:41:39 +0200 Subject: [PATCH] feature: add tsm_list_mapper transmutator --- README.md | 34 ++++- ckanext/transmute/tests/test_transmutators.py | 122 +++++++++++++++--- ckanext/transmute/transmutators.py | 37 +++++- 3 files changed, 167 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index be427ab..a19b04e 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ This extension helps to validate and convert data based on a specific schema. We have a data dict: -``` +```json { "title": "Test-dataset", "email": "test@test.ua", @@ -42,7 +42,7 @@ We have a data dict: And we want to achieve this: -``` +```py { "name": "test-dataset", "email": "test@test.ua", @@ -152,7 +152,7 @@ There are a few default transmutators you can use in your schema. Of course, you - `tsm_isodate` - Validates datetime string. Mutates an iso-like string to datetime object. - `tsm_to_string` - Casts a `field.value` to `str`. - `tsm_get_nested` - Allows you to pick up a value from a nested structure. Example: -``` +```py data = "title_translated": [ {"nested_field": {"en": "en title", "ar": "العنوان ar"}}, ] @@ -170,7 +170,7 @@ schema = ... This will take a value for a `title` field from `title_translated` field. Because `title_translated` is an array with nested objects, we are using the `tsm_get_nested` transmutator to achieve the value from it. - `tsm_trim_string` - Trim string with max length. Example to trim `hello world` to `hello`: -``` +```py data = {"field_name": "hello world} schema = ... @@ -182,7 +182,7 @@ schema = ... ... ``` - `tsm_concat` - Concatenate strings. Use `$self` to point on field value. Example: -``` +```py data = {"id": "dataset-1"} schema = ... @@ -211,7 +211,7 @@ Map a value to another value. The current value must serve as a key within the m The default value to be used when the key is not found in the mapping. If the default value is not provided, the current value will be used as it. -``` +```py data = {"language": "English"} schema = ... @@ -227,6 +227,28 @@ schema = ... ... ``` +- `tsm_list_mapper` - Map current value to the mapping dict + +Works as `tsm_mapper` but with list. Doesn't have a `default` value. Third argument `remove` must be `True` or `False`. + +If `remove` set to True, removes values from the list if they don't have a corresponding mapping. Defaults to `False`. + +```py +data = {"topic": ["Health", "Military", "Utilities"]} + +schema = ... + "language": { + "validators": [ + [ + "tsm_mapper", + {"English": "eng"}, + "English", + ] + ], + }, + ... +``` + ### Keywords 1. `map_to` (`str`) - changes the `field.name` in result dict. 2. `validators` (`list[str]`) - a list of transmutators that will be applied to a `field.value`. A transmutator could be a `string` or a `list` where the first item must be transmutator name and others are arbitrary values. Example: diff --git a/ckanext/transmute/tests/test_transmutators.py b/ckanext/transmute/tests/test_transmutators.py index 00994e3..29114b2 100644 --- a/ckanext/transmute/tests/test_transmutators.py +++ b/ckanext/transmute/tests/test_transmutators.py @@ -33,6 +33,28 @@ def test_transmute_validator_without_args(self): assert e.value.error == "Arguments for validator weren't provided" + @pytest.mark.parametrize("default", [False, 0, "", [], {}, None]) + def test_default_allows_falsy_values(self, default): + """False, 0, "", etc. can be used as a default value""" + + tsm_schema = build_schema( + { + "field_name": {"default": default}, + } + ) + + result = call_action( + "tsm_transmute", + data={}, + schema=tsm_schema, + root="Dataset", + ) + + assert result == {"field_name": default} + + +@pytest.mark.ckan_config("ckan.plugins", "scheming_datasets") +class TestTrimStringTransmutator: def test_trim_string_transmutator(self): data: dict[str, Any] = { "field_name": "hello world", @@ -103,6 +125,9 @@ def test_trim_string_transmutator_with_not_integer_length(self): root="Dataset", ) + +@pytest.mark.ckan_config("ckan.plugins", "scheming_datasets") +class TestConcatTransmutator: def test_concat_transmutator_with_self(self): data: dict[str, Any] = { "identifier": "right-to-the-night-results", @@ -288,6 +313,9 @@ def test_concat_transmutator_with_field_link_nested(self): for res in result["resources"]: assert res["title"] == f"{result['title']} {res['format'].upper()}" + +@pytest.mark.ckan_config("ckan.plugins", "scheming_datasets") +class TestUniqueOnlyTransmutator: def test_unique_only(self): """You can skip using $self if you want for some reason""" data: dict[str, Any] = {"field_name": [1, 2, 3, 3, 4, 5, 6, 6]} @@ -349,25 +377,6 @@ def test_unique_only_empty_list(self): assert result["field_name"] == [] - @pytest.mark.parametrize("default", [False, 0, "", [], {}, None]) - def test_default_allows_falsy_values(self, default): - """False, 0, "", etc. can be used as a default value""" - - tsm_schema = build_schema( - { - "field_name": {"default": default}, - } - ) - - result = call_action( - "tsm_transmute", - data={}, - schema=tsm_schema, - root="Dataset", - ) - - assert result == {"field_name": default} - @pytest.mark.ckan_config("ckan.plugins", "scheming_datasets") class TestMapperTransmutator: @@ -459,3 +468,78 @@ def test_mapper_without_default(self): ) assert result["language"] == "ua" + + +@pytest.mark.ckan_config("ckan.plugins", "scheming_datasets") +class TestListMapperTransmutator: + def test_list_mapper_with_mapped_value(self): + data: dict[str, Any] = {"topic": ["Health", "Military", "Utilities"]} + + tsm_schema = build_schema( + { + "topic": { + "validators": [ + [ + "tsm_list_mapper", + {"Military": "Army", "Utilities": "Utility"}, + ], + ], + }, + } + ) + + result = call_action( + "tsm_transmute", + data=data, + schema=tsm_schema, + root="Dataset", + ) + + assert result["topic"] == ["Health", "Army", "Utility"] + + def test_list_mapper_with_remove(self): + data: dict[str, Any] = {"topic": ["Health", "Military", "Utilities"]} + + tsm_schema = build_schema( + { + "topic": { + "validators": [ + [ + "tsm_list_mapper", + {"Military": "Army", "Utilities": "Utility"}, + True, + ], + ], + }, + } + ) + + result = call_action( + "tsm_transmute", + data=data, + schema=tsm_schema, + root="Dataset", + ) + + assert result["topic"] == ["Army", "Utility"] + + def test_list_mapper_without_mapping(self): + data: dict[str, Any] = {"topic": ["Health", "Military", "Utilities"]} + + tsm_schema = build_schema( + { + "topic": { + "validators": [["tsm_list_mapper"]], + } + } + ) + + with pytest.raises(TransmutatorError) as e: + call_action( + "tsm_transmute", + data=data, + schema=tsm_schema, + root="Dataset", + ) + + assert e.value.error == "Arguments for validator weren't provided" diff --git a/ckanext/transmute/transmutators.py b/ckanext/transmute/transmutators.py index d517d5e..59fd9b3 100644 --- a/ckanext/transmute/transmutators.py +++ b/ckanext/transmute/transmutators.py @@ -219,7 +219,9 @@ def unique_only(field: Field) -> Field: @transmutator -def mapper(field: Field, mapping: dict[Any, Any], default: Optional[Any] = None) -> Field: +def mapper( + field: Field, mapping: dict[Any, Any], default: Optional[Any] = None +) -> Field: """Map a value with a new value. The initial value must serve as a key within a mapping dictionary, while the dict value will represent the updated value. @@ -237,3 +239,36 @@ def mapper(field: Field, mapping: dict[Any, Any], default: Optional[Any] = None) field.value = new_value return field + + +@transmutator +def list_mapper( + field: Field, + mapping: dict[Any, Any], + remove: Optional[bool] = False, +) -> Field: + """ + Maps values within a list to their corresponding values in a provided mapping dictionary. + + Args: + field (Field): Field object + mapping (dict[Any, Any]): A dictionary representing the mapping of values. + remove (bool, optional): If set to True, removes values from the list if + they don't have a corresponding mapping. Defaults to False. + """ + if not isinstance(field.value, list): + return field + + result = [] + + for value in field.value: + map_value = mapping.get(value) + + if not map_value and remove: + continue + + result.append(map_value or value) + + field.value = result + + return field