Skip to content

Commit

Permalink
feature: add tsm_list_mapper transmutator
Browse files Browse the repository at this point in the history
  • Loading branch information
mutantsan committed Feb 29, 2024
1 parent a13e4db commit 27a30d4
Show file tree
Hide file tree
Showing 3 changed files with 167 additions and 26 deletions.
34 changes: 28 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ This extension helps to validate and convert data based on a specific schema.

We have a data dict:

```
```json
{
"title": "Test-dataset",
"email": "[email protected]",
Expand Down Expand Up @@ -42,7 +42,7 @@ We have a data dict:

And we want to achieve this:

```
```py
{
"name": "test-dataset",
"email": "[email protected]",
Expand Down Expand Up @@ -152,7 +152,7 @@ There are a few default transmutators you can use in your schema. Of course, you
- `tsm_isodate` - Validates datetime string. Mutates an iso-like string to datetime object.
- `tsm_to_string` - Casts a `field.value` to `str`.
- `tsm_get_nested` - Allows you to pick up a value from a nested structure. Example:
```
```py
data = "title_translated": [
{"nested_field": {"en": "en title", "ar": "العنوان ar"}},
]
Expand All @@ -170,7 +170,7 @@ schema = ...
This will take a value for a `title` field from `title_translated` field. Because `title_translated` is an array with nested objects, we are using the `tsm_get_nested` transmutator to achieve the value from it.

- `tsm_trim_string` - Trim string with max length. Example to trim `hello world` to `hello`:
```
```py
data = {"field_name": "hello world}

schema = ...
Expand All @@ -182,7 +182,7 @@ schema = ...
...
```
- `tsm_concat` - Concatenate strings. Use `$self` to point on field value. Example:
```
```py
data = {"id": "dataset-1"}

schema = ...
Expand Down Expand Up @@ -211,7 +211,7 @@ Map a value to another value. The current value must serve as a key within the m

The default value to be used when the key is not found in the mapping. If the default value is not provided, the current value will be used as it.

```
```py
data = {"language": "English"}

schema = ...
Expand All @@ -227,6 +227,28 @@ schema = ...
...
```

- `tsm_list_mapper` - Map current value to the mapping dict

Works as `tsm_mapper` but with list. Doesn't have a `default` value. Third argument `remove` must be `True` or `False`.

If `remove` set to True, removes values from the list if they don't have a corresponding mapping. Defaults to `False`.

```py
data = {"topic": ["Health", "Military", "Utilities"]}

schema = ...
"language": {
"validators": [
[
"tsm_mapper",
{"English": "eng"},
"English",
]
],
},
...
```

### Keywords
1. `map_to` (`str`) - changes the `field.name` in result dict.
2. `validators` (`list[str]`) - a list of transmutators that will be applied to a `field.value`. A transmutator could be a `string` or a `list` where the first item must be transmutator name and others are arbitrary values. Example:
Expand Down
122 changes: 103 additions & 19 deletions ckanext/transmute/tests/test_transmutators.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,28 @@ def test_transmute_validator_without_args(self):

assert e.value.error == "Arguments for validator weren't provided"

@pytest.mark.parametrize("default", [False, 0, "", [], {}, None])
def test_default_allows_falsy_values(self, default):
"""False, 0, "", etc. can be used as a default value"""

tsm_schema = build_schema(
{
"field_name": {"default": default},
}
)

result = call_action(
"tsm_transmute",
data={},
schema=tsm_schema,
root="Dataset",
)

assert result == {"field_name": default}


@pytest.mark.ckan_config("ckan.plugins", "scheming_datasets")
class TestTrimStringTransmutator:
def test_trim_string_transmutator(self):
data: dict[str, Any] = {
"field_name": "hello world",
Expand Down Expand Up @@ -103,6 +125,9 @@ def test_trim_string_transmutator_with_not_integer_length(self):
root="Dataset",
)


@pytest.mark.ckan_config("ckan.plugins", "scheming_datasets")
class TestConcatTransmutator:
def test_concat_transmutator_with_self(self):
data: dict[str, Any] = {
"identifier": "right-to-the-night-results",
Expand Down Expand Up @@ -288,6 +313,9 @@ def test_concat_transmutator_with_field_link_nested(self):
for res in result["resources"]:
assert res["title"] == f"{result['title']} {res['format'].upper()}"


@pytest.mark.ckan_config("ckan.plugins", "scheming_datasets")
class TestUniqueOnlyTransmutator:
def test_unique_only(self):
"""You can skip using $self if you want for some reason"""
data: dict[str, Any] = {"field_name": [1, 2, 3, 3, 4, 5, 6, 6]}
Expand Down Expand Up @@ -349,25 +377,6 @@ def test_unique_only_empty_list(self):

assert result["field_name"] == []

@pytest.mark.parametrize("default", [False, 0, "", [], {}, None])
def test_default_allows_falsy_values(self, default):
"""False, 0, "", etc. can be used as a default value"""

tsm_schema = build_schema(
{
"field_name": {"default": default},
}
)

result = call_action(
"tsm_transmute",
data={},
schema=tsm_schema,
root="Dataset",
)

assert result == {"field_name": default}


@pytest.mark.ckan_config("ckan.plugins", "scheming_datasets")
class TestMapperTransmutator:
Expand Down Expand Up @@ -459,3 +468,78 @@ def test_mapper_without_default(self):
)

assert result["language"] == "ua"


@pytest.mark.ckan_config("ckan.plugins", "scheming_datasets")
class TestListMapperTransmutator:
def test_list_mapper_with_mapped_value(self):
data: dict[str, Any] = {"topic": ["Health", "Military", "Utilities"]}

tsm_schema = build_schema(
{
"topic": {
"validators": [
[
"tsm_list_mapper",
{"Military": "Army", "Utilities": "Utility"},
],
],
},
}
)

result = call_action(
"tsm_transmute",
data=data,
schema=tsm_schema,
root="Dataset",
)

assert result["topic"] == ["Health", "Army", "Utility"]

def test_list_mapper_with_remove(self):
data: dict[str, Any] = {"topic": ["Health", "Military", "Utilities"]}

tsm_schema = build_schema(
{
"topic": {
"validators": [
[
"tsm_list_mapper",
{"Military": "Army", "Utilities": "Utility"},
True,
],
],
},
}
)

result = call_action(
"tsm_transmute",
data=data,
schema=tsm_schema,
root="Dataset",
)

assert result["topic"] == ["Army", "Utility"]

def test_list_mapper_without_mapping(self):
data: dict[str, Any] = {"topic": ["Health", "Military", "Utilities"]}

tsm_schema = build_schema(
{
"topic": {
"validators": [["tsm_list_mapper"]],
}
}
)

with pytest.raises(TransmutatorError) as e:
call_action(
"tsm_transmute",
data=data,
schema=tsm_schema,
root="Dataset",
)

assert e.value.error == "Arguments for validator weren't provided"
37 changes: 36 additions & 1 deletion ckanext/transmute/transmutators.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,9 @@ def unique_only(field: Field) -> Field:


@transmutator
def mapper(field: Field, mapping: dict[Any, Any], default: Optional[Any] = None) -> Field:
def mapper(
field: Field, mapping: dict[Any, Any], default: Optional[Any] = None
) -> Field:
"""Map a value with a new value. The initial value must serve as a key within
a mapping dictionary, while the dict value will represent the updated value.
Expand All @@ -237,3 +239,36 @@ def mapper(field: Field, mapping: dict[Any, Any], default: Optional[Any] = None)
field.value = new_value

return field


@transmutator
def list_mapper(
field: Field,
mapping: dict[Any, Any],
remove: Optional[bool] = False,
) -> Field:
"""
Maps values within a list to their corresponding values in a provided mapping dictionary.
Args:
field (Field): Field object
mapping (dict[Any, Any]): A dictionary representing the mapping of values.
remove (bool, optional): If set to True, removes values from the list if
they don't have a corresponding mapping. Defaults to False.
"""
if not isinstance(field.value, list):
return field

result = []

for value in field.value:
map_value = mapping.get(value)

if not map_value and remove:
continue

result.append(map_value or value)

field.value = result

return field

0 comments on commit 27a30d4

Please sign in to comment.