feat: add weight to fields

DataShades · Oct 7, 2024 · 963f36f · 963f36f
1 parent 30cc544
commit 963f36f
Show file tree

Hide file tree

Showing 6 changed files with 64 additions and 118 deletions.
diff --git a/README.md b/README.md
@@ -250,7 +250,7 @@ schema = ...
 ```
 
 ### Keywords
-1. `map_to` (`str`) - changes the `field.name` in result dict.
+1. `map` (`str`) - changes the `field.name` in result dict.
 2. `validators` (`list[str]`) - a list of transmutators that will be applied to a `field.value`. A transmutator could be a `string` or a `list` where the first item must be transmutator name and others are arbitrary values. Example:
     ```
     ...

diff --git a/ckanext/transmute/logic/action/get.py b/ckanext/transmute/logic/action/get.py
@@ -60,38 +60,32 @@ def _transmute_data(data, definition, root):
     if not schema:
         return
 
-    mutate_old_fields(data, definition, root)
-    create_new_fields(data, definition, root)
+    mutate_fields(data, definition, root)
 
 
-def mutate_old_fields(data, definition, root):
-    """Checks all of the data fields and mutate them
-    according to the provided schema
+def _weighten_fields(pair: tuple[str, SchemaField]):
+    return pair[1].weight
 
-    New fields won't be created here, because we are
-    only traversing the data dictionary
 
-    We can't traverse only Data or only Schema, because
-    otherwise, the user will have to define all of the fields
-    that could exist in data
+def mutate_fields(data: dict[str, Any], definition: SchemaParser, root: str):
+    """Checks all of the schema fields and mutate/create them according to the
+    provided schema.
 
     Args:
         data (dict: [str, Any]): a data to mutate
         definition (SchemaParser): SchemaParser object
         root (str): a root schema type
+
     """
     schema = definition.types[root]
 
-    for field_name, value in data.copy().items():
-        field: SchemaField = schema["fields"].get(field_name)
-
-        if not field:
-            continue
-
+    for field_name, field in sorted(schema["fields"].items(), key=_weighten_fields):
         if field.remove:
-            data.pop(field_name)
+            data.pop(field_name, None)
             continue
 
+        value = data.get(field_name)
+
         if field.default is not SENTINEL and not value:
             data[field.name] = value = field.default
 
@@ -120,51 +114,19 @@ def mutate_old_fields(data, definition, root):
                 data[field.name] = value = field.value
 
         if field.is_multiple():
-            for nested_field in value:
+            for nested_field in value or []:
                 _transmute_data(nested_field, definition, field.type)
+
         else:
+            if field_name not in data and not field.validate_missing:
+                continue
+
             data[field.name] = _apply_validators(
                 Field(field.name, value, root, data_ctx.get()), field.validators
             )
 
-        if field.map_to:
-            data[field.map_to] = data.pop(field.name)
-
-
-def create_new_fields(data, definition, root):
-    """New fields are going to be created according
-    to the provided schema
-
-    If the defined field is not exist in the data dict
-    we are going to create it
-
-    The newly created field's value could be inherited from
-    an existing field. This field must be defined in the
-    schema.
-    """
-    schema = definition.types[root]
-
-    for field_name, field in schema["fields"].items():
-        if field_name in data:
-            continue
-
-        if field.value is not SENTINEL:
-            data[field_name] = field.value
-        elif field.default is not SENTINEL:
-            data[field_name] = field.default
-
-        if field.default_from:
-            data[field_name] = _default_from(data, field)
-
-        if field.replace_from:
-            data[field_name] = _replace_from(data, field)
-
-        if field_name not in data:
-            continue
-
-        data[field_name] = _apply_validators(
-            Field(field_name, data[field_name], root, data_ctx.get()), field.validators
-        )
+        if field.map:
+            data[field.map] = data.pop(field.name, None)
 
 
 def _default_from(data, field: SchemaField):
@@ -210,7 +172,7 @@ def _get_first_filled(data, external_fields: list[str]):
             return field_value
 
 
-def _apply_validators(field: Field, validators: list[Callable[[Field], Any]]):
+def _apply_validators(field: Field, validators: list[str | list[str]]):
     """Applies validators sequentially to the field value
 
     Args:

diff --git a/ckanext/transmute/schema.py b/ckanext/transmute/schema.py
@@ -3,48 +3,35 @@
 from typing import Any, Optional, Union
 
 import copy
+import dataclasses
 
 from ckan.logic.schema import validator_args
-
+from ckan import types
 from ckanext.transmute.exception import SchemaParsingError, SchemaFieldError
 from ckanext.transmute.utils import SENTINEL
 
 
+@dataclasses.dataclass
 class SchemaField:
-    def __init__(
-        self,
-        *,
-        name: str,
-        type_: str,
-        definition: dict,
-        map_to: Optional[str] = None,
-        validators: Optional[list] = None,
-        multiple: bool = False,
-        remove: bool = False,
-        default: Any = SENTINEL,
-        default_from: Optional[str] = None,
-        value: Any = SENTINEL,
-        replace_from: Optional[str] = None,
-        inherit_mode: Optional[str] = None,
-        update: bool = False,
-    ):
-        self.name = name
-        self.type = type_
-        self.definition = definition
-        self.map_to = map_to
-        self.validators = validators or []
-        self.multiple = multiple
-        self.remove = remove
-        self.default = default
-        self.default_from = default_from
-        self.replace_from = replace_from
-        self.inherit_mode = inherit_mode
-        self.value = value
-        self.update = update
+    name: str
+    type: str
+    definition: dict[str, Any]
+    map: Optional[str] = None
+    validators: list[Any] = dataclasses.field(default_factory=list)
+    multiple: bool = False
+    remove: bool = False
+    default: Any = SENTINEL
+    default_from: Optional[str] = None
+    value: Any = SENTINEL
+    replace_from: Optional[str] = None
+    inherit_mode: Optional[str] = "combine"
+    update: bool = False
+    validate_missing: bool = False
+    weight: int = 0
 
     def __repr__(self):
         return (
-            f"<Field name={self.name} map_to={self.map_to}"
+            f"<Field name={self.name} map={self.map}"
             f" type={self.type} multiple={self.multiple}"
             f" validators={self.validators}>"
         )
@@ -86,14 +73,14 @@ def _get_sibling_field_name(self, field_name: str) -> Optional[Any]:
 
 
 class SchemaParser:
-    def __init__(self, schema):
+    def __init__(self, schema: dict[str, Any]):
         self.schema = copy.deepcopy(schema)
         self.root_type = self.get_root_type()
         self.types = self.parse_types()
         self.parse_fields()
 
     def get_root_type(self):
-        root_type: str = self.schema.get("root")
+        root_type: str = self.schema.get("root", "")
 
         if not root_type:
             raise SchemaParsingError("Schema: root type is missing")
@@ -117,7 +104,7 @@ def parse_fields(self):
                 )
 
     def _parse_field(
-        self, field_name: str, field_meta: dict, _type: str
+        self, field_name: str, field_meta: dict[str, Any], _type: str
     ) -> SchemaField:
         """Create a SchemaField combining all the
         information about field
@@ -131,25 +118,15 @@ def _parse_field(
             SchemaField: SchemaField object
         """
 
-        return SchemaField(
-            name=field_name,
-            type_=field_meta.get("type", _type),
-            definition=self.types[_type],
-            map_to=field_meta.get("map", None),
-            validators=field_meta.get("validators"),
-            multiple=field_meta.get("multiple", False),
-            remove=field_meta.get("remove", False),
-            default=field_meta.get("default", SENTINEL),
-            default_from=field_meta.get("default_from", None),
-            value=field_meta.get("value", SENTINEL),
-            replace_from=field_meta.get("replace_from", None),
-            inherit_mode=field_meta.get("inherit_mode", "combine"),
-            update=field_meta.get("update", False),
-        )
+        params: dict[str, Any] = dict({"type": _type}, **field_meta)
+        return SchemaField(name=field_name, definition=self.types[_type], **params)
 
 
 @validator_args
-def transmute_schema(not_missing, default):
+def transmute_schema(
+    not_missing: types.Validator,
+    default: types.ValidatorFactory,
+) -> types.Schema:
     return {
         "data": [not_missing],
         "schema": [not_missing],
@@ -158,7 +135,7 @@ def transmute_schema(not_missing, default):
 
 
 @validator_args
-def validate_schema(not_missing):
+def validate_schema(not_missing: types.Validator) -> types.Schema:
     return {
         "data": [not_missing],
     }
diff --git a/ckanext/transmute/types.py b/ckanext/transmute/types.py
@@ -1,18 +1,18 @@
 from __future__ import annotations
 
+import dataclasses
 from typing import Any
 from typing_extensions import TypedDict
 
-from recordclass import RecordClass
-
 
 class TransmuteData(TypedDict):
     data: dict[str, Any]
     schema: dict[str, Any]
     root: str
 
 
-class Field(RecordClass):
+@dataclasses.dataclass
+class Field:
     field_name: str
     value: Any
     type: str

diff --git a/ckanext/transmute/utils.py b/ckanext/transmute/utils.py
@@ -9,7 +9,7 @@
 from ckanext.transmute.interfaces import ITransmute
 from ckanext.transmute.types import MODE_COMBINE, MODE_FIRST_FILLED
 
-SENTINEL = {}
+SENTINEL = object()
 _transmutator_cache = {}
 _schema_cache = {}
 

diff --git a/setup.cfg b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = ckanext-transmute
-version = 1.6.0
+version = 2.0.0a0
 description = Converts a dataset based on a specific schema
 long_description = file: README.md
 long_description_content_type = text/markdown
@@ -11,24 +11,31 @@ license = AGPL
 classifiers =
     Development Status :: 4 - Beta
     License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)
-    Programming Language :: Python :: 2.7
+    Programming Language :: Python :: 3.8
+    Programming Language :: Python :: 3.9
+    Programming Language :: Python :: 3.10
+    Programming Language :: Python :: 3.11
+    Programming Language :: Python :: 3.12
+    Programming Language :: Python :: 3.13
+    Programming Language :: Python :: 3.14
 keywords =
     CKAN
     scheming
     schema
 
 [options]
+python_requires = >= 3.8
 packages = find:
 namespace_packages = ckanext
 install_requires =
         ckanext-scheming
         typing-extensions
-        recordclass
-include_package_data = True,
+include_package_data = True
 
 [options.entry_points]
 ckan.plugins =
     transmute = ckanext.transmute.plugin:TransmutePlugin
+
 babel.extractors =
     ckan = ckan.lib.extract:extract_ckan