Skip to content

Commit

Permalink
feat: add weight to fields
Browse files Browse the repository at this point in the history
  • Loading branch information
smotornyuk committed Oct 7, 2024
1 parent 30cc544 commit 963f36f
Show file tree
Hide file tree
Showing 6 changed files with 64 additions and 118 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ schema = ...
```

### Keywords
1. `map_to` (`str`) - changes the `field.name` in result dict.
1. `map` (`str`) - changes the `field.name` in result dict.
2. `validators` (`list[str]`) - a list of transmutators that will be applied to a `field.value`. A transmutator could be a `string` or a `list` where the first item must be transmutator name and others are arbitrary values. Example:
```
...
Expand Down
76 changes: 19 additions & 57 deletions ckanext/transmute/logic/action/get.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,38 +60,32 @@ def _transmute_data(data, definition, root):
if not schema:
return

mutate_old_fields(data, definition, root)
create_new_fields(data, definition, root)
mutate_fields(data, definition, root)


def mutate_old_fields(data, definition, root):
"""Checks all of the data fields and mutate them
according to the provided schema
def _weighten_fields(pair: tuple[str, SchemaField]):
return pair[1].weight

New fields won't be created here, because we are
only traversing the data dictionary

We can't traverse only Data or only Schema, because
otherwise, the user will have to define all of the fields
that could exist in data
def mutate_fields(data: dict[str, Any], definition: SchemaParser, root: str):
"""Checks all of the schema fields and mutate/create them according to the
provided schema.
Args:
data (dict: [str, Any]): a data to mutate
definition (SchemaParser): SchemaParser object
root (str): a root schema type
"""
schema = definition.types[root]

for field_name, value in data.copy().items():
field: SchemaField = schema["fields"].get(field_name)

if not field:
continue

for field_name, field in sorted(schema["fields"].items(), key=_weighten_fields):
if field.remove:
data.pop(field_name)
data.pop(field_name, None)
continue

value = data.get(field_name)

if field.default is not SENTINEL and not value:
data[field.name] = value = field.default

Expand Down Expand Up @@ -120,51 +114,19 @@ def mutate_old_fields(data, definition, root):
data[field.name] = value = field.value

if field.is_multiple():
for nested_field in value:
for nested_field in value or []:
_transmute_data(nested_field, definition, field.type)

else:
if field_name not in data and not field.validate_missing:
continue

data[field.name] = _apply_validators(
Field(field.name, value, root, data_ctx.get()), field.validators
)

if field.map_to:
data[field.map_to] = data.pop(field.name)


def create_new_fields(data, definition, root):
"""New fields are going to be created according
to the provided schema
If the defined field is not exist in the data dict
we are going to create it
The newly created field's value could be inherited from
an existing field. This field must be defined in the
schema.
"""
schema = definition.types[root]

for field_name, field in schema["fields"].items():
if field_name in data:
continue

if field.value is not SENTINEL:
data[field_name] = field.value
elif field.default is not SENTINEL:
data[field_name] = field.default

if field.default_from:
data[field_name] = _default_from(data, field)

if field.replace_from:
data[field_name] = _replace_from(data, field)

if field_name not in data:
continue

data[field_name] = _apply_validators(
Field(field_name, data[field_name], root, data_ctx.get()), field.validators
)
if field.map:
data[field.map] = data.pop(field.name, None)


def _default_from(data, field: SchemaField):
Expand Down Expand Up @@ -210,7 +172,7 @@ def _get_first_filled(data, external_fields: list[str]):
return field_value


def _apply_validators(field: Field, validators: list[Callable[[Field], Any]]):
def _apply_validators(field: Field, validators: list[str | list[str]]):
"""Applies validators sequentially to the field value
Args:
Expand Down
81 changes: 29 additions & 52 deletions ckanext/transmute/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,48 +3,35 @@
from typing import Any, Optional, Union

import copy
import dataclasses

from ckan.logic.schema import validator_args

from ckan import types
from ckanext.transmute.exception import SchemaParsingError, SchemaFieldError
from ckanext.transmute.utils import SENTINEL


@dataclasses.dataclass
class SchemaField:
def __init__(
self,
*,
name: str,
type_: str,
definition: dict,
map_to: Optional[str] = None,
validators: Optional[list] = None,
multiple: bool = False,
remove: bool = False,
default: Any = SENTINEL,
default_from: Optional[str] = None,
value: Any = SENTINEL,
replace_from: Optional[str] = None,
inherit_mode: Optional[str] = None,
update: bool = False,
):
self.name = name
self.type = type_
self.definition = definition
self.map_to = map_to
self.validators = validators or []
self.multiple = multiple
self.remove = remove
self.default = default
self.default_from = default_from
self.replace_from = replace_from
self.inherit_mode = inherit_mode
self.value = value
self.update = update
name: str
type: str
definition: dict[str, Any]
map: Optional[str] = None
validators: list[Any] = dataclasses.field(default_factory=list)
multiple: bool = False
remove: bool = False
default: Any = SENTINEL
default_from: Optional[str] = None
value: Any = SENTINEL
replace_from: Optional[str] = None
inherit_mode: Optional[str] = "combine"
update: bool = False
validate_missing: bool = False
weight: int = 0

def __repr__(self):
return (
f"<Field name={self.name} map_to={self.map_to}"
f"<Field name={self.name} map={self.map}"
f" type={self.type} multiple={self.multiple}"
f" validators={self.validators}>"
)
Expand Down Expand Up @@ -86,14 +73,14 @@ def _get_sibling_field_name(self, field_name: str) -> Optional[Any]:


class SchemaParser:
def __init__(self, schema):
def __init__(self, schema: dict[str, Any]):
self.schema = copy.deepcopy(schema)
self.root_type = self.get_root_type()
self.types = self.parse_types()
self.parse_fields()

def get_root_type(self):
root_type: str = self.schema.get("root")
root_type: str = self.schema.get("root", "")

if not root_type:
raise SchemaParsingError("Schema: root type is missing")
Expand All @@ -117,7 +104,7 @@ def parse_fields(self):
)

def _parse_field(
self, field_name: str, field_meta: dict, _type: str
self, field_name: str, field_meta: dict[str, Any], _type: str
) -> SchemaField:
"""Create a SchemaField combining all the
information about field
Expand All @@ -131,25 +118,15 @@ def _parse_field(
SchemaField: SchemaField object
"""

return SchemaField(
name=field_name,
type_=field_meta.get("type", _type),
definition=self.types[_type],
map_to=field_meta.get("map", None),
validators=field_meta.get("validators"),
multiple=field_meta.get("multiple", False),
remove=field_meta.get("remove", False),
default=field_meta.get("default", SENTINEL),
default_from=field_meta.get("default_from", None),
value=field_meta.get("value", SENTINEL),
replace_from=field_meta.get("replace_from", None),
inherit_mode=field_meta.get("inherit_mode", "combine"),
update=field_meta.get("update", False),
)
params: dict[str, Any] = dict({"type": _type}, **field_meta)
return SchemaField(name=field_name, definition=self.types[_type], **params)


@validator_args
def transmute_schema(not_missing, default):
def transmute_schema(
not_missing: types.Validator,
default: types.ValidatorFactory,
) -> types.Schema:
return {
"data": [not_missing],
"schema": [not_missing],
Expand All @@ -158,7 +135,7 @@ def transmute_schema(not_missing, default):


@validator_args
def validate_schema(not_missing):
def validate_schema(not_missing: types.Validator) -> types.Schema:
return {
"data": [not_missing],
}
6 changes: 3 additions & 3 deletions ckanext/transmute/types.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
from __future__ import annotations

import dataclasses
from typing import Any
from typing_extensions import TypedDict

from recordclass import RecordClass


class TransmuteData(TypedDict):
data: dict[str, Any]
schema: dict[str, Any]
root: str


class Field(RecordClass):
@dataclasses.dataclass
class Field:
field_name: str
value: Any
type: str
Expand Down
2 changes: 1 addition & 1 deletion ckanext/transmute/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from ckanext.transmute.interfaces import ITransmute
from ckanext.transmute.types import MODE_COMBINE, MODE_FIRST_FILLED

SENTINEL = {}
SENTINEL = object()
_transmutator_cache = {}
_schema_cache = {}

Expand Down
15 changes: 11 additions & 4 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = ckanext-transmute
version = 1.6.0
version = 2.0.0a0
description = Converts a dataset based on a specific schema
long_description = file: README.md
long_description_content_type = text/markdown
Expand All @@ -11,24 +11,31 @@ license = AGPL
classifiers =
Development Status :: 4 - Beta
License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)
Programming Language :: Python :: 2.7
Programming Language :: Python :: 3.8
Programming Language :: Python :: 3.9
Programming Language :: Python :: 3.10
Programming Language :: Python :: 3.11
Programming Language :: Python :: 3.12
Programming Language :: Python :: 3.13
Programming Language :: Python :: 3.14
keywords =
CKAN
scheming
schema

[options]
python_requires = >= 3.8
packages = find:
namespace_packages = ckanext
install_requires =
ckanext-scheming
typing-extensions
recordclass
include_package_data = True,
include_package_data = True

[options.entry_points]
ckan.plugins =
transmute = ckanext.transmute.plugin:TransmutePlugin

babel.extractors =
ckan = ckan.lib.extract:extract_ckan

Expand Down

0 comments on commit 963f36f

Please sign in to comment.