Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor to take advantage of Pydantic validation #55

Merged
merged 14 commits into from
Nov 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ["3.10", "3.11", "3.12"]
python-version: ["3.9", "3.10", "3.11", "3.12"]
os: [ubuntu-latest, windows-latest, macos-latest]

steps:
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
name = "geometamaker"
description = "metadata creation for geospatial data"
readme = "README.md"
requires-python = ">=3.10,<3.13"
requires-python = ">=3.9,<3.13"
license = {file = "LICENSE.txt"}
maintainers = [
{name = "Natural Capital Project Software Team"}
Expand All @@ -17,6 +17,7 @@ classifiers = [
"Operating System :: Microsoft",
"Operating System :: POSIX",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ GDAL
frictionless
numpy
platformdirs
Pydantic
pygeoprocessing>=2.4.5
pyyaml
requests
5 changes: 3 additions & 2 deletions src/geometamaker/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os

import platformdirs
from pydantic import ValidationError

from . import models

Expand Down Expand Up @@ -35,8 +36,8 @@ def __init__(self, config_path=None):
except FileNotFoundError as err:
LOGGER.debug('config file does not exist', exc_info=err)
pass
# an invalid profile should raise a TypeError
except TypeError as err:
# an invalid profile should raise a ValidationError
except ValidationError as err:
LOGGER.warning('', exc_info=err)
LOGGER.warning(
f'{self.config_path} contains an inavlid profile. '
Expand Down
46 changes: 24 additions & 22 deletions src/geometamaker/geometamaker.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import dataclasses
import functools
import hashlib
import logging
Expand Down Expand Up @@ -228,7 +227,7 @@ def describe_vector(source_dataset_path, scheme):
fields.append(
models.FieldSchema(name=fld.name, type=fld.GetTypeName()))
vector = layer = None
description['schema'] = models.TableSchema(fields=fields)
description['data_model'] = models.TableSchema(fields=fields)

info = pygeoprocessing.get_vector_info(source_dataset_path)
bbox = models.BoundingBox(*info['bounding_box'])
Expand Down Expand Up @@ -264,7 +263,7 @@ def describe_raster(source_dataset_path, scheme):
gdal_type=gdal.GetDataTypeName(info['datatype']),
numpy_type=numpy.dtype(info['numpy_type']).name,
nodata=info['nodata'][i]))
description['schema'] = models.RasterSchema(
description['data_model'] = models.RasterSchema(
bands=bands,
pixel_size=info['pixel_size'],
raster_size=info['raster_size'])
Expand Down Expand Up @@ -293,7 +292,8 @@ def describe_table(source_dataset_path, scheme):

"""
description = describe_file(source_dataset_path, scheme)
description['schema'] = models.TableSchema(**description['schema'])
description['data_model'] = models.TableSchema(**description['schema'])
del description['schema'] # we forbid extra args in our Pydantic models
return description


Expand Down Expand Up @@ -354,46 +354,48 @@ def describe(source_dataset_path, profile=None):
# Load existing metadata file
try:
existing_resource = RESOURCE_MODELS[resource_type].load(metadata_path)
if 'schema' in description:
if isinstance(description['schema'], models.RasterSchema):
# If existing band metadata still matches schema of the file
# carry over metadata from the existing file because it could
# include human-defined properties.
if 'data_model' in description:
if isinstance(description['data_model'], models.RasterSchema):
# If existing band metadata still matches data_model of the file
# carry over existing metadata because it could include
# human-defined properties.
new_bands = []
for band in description['schema'].bands:
for band in description['data_model'].bands:
try:
eband = existing_resource.get_band_description(band.index)
# TODO: rewrite this as __eq__ of BandSchema?
if (band.numpy_type, band.gdal_type, band.nodata) == (
eband.numpy_type, eband.gdal_type, eband.nodata):
band = dataclasses.replace(band, **eband.__dict__)
updated_dict = band.model_dump() | eband.model_dump()
band = models.BandSchema(**updated_dict)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fun! I was not aware that you could bitwise-or two dicts to take their union.

except IndexError:
pass
new_bands.append(band)
description['schema'].bands = new_bands
if isinstance(description['schema'], models.TableSchema):
# If existing field metadata still matches schema of the file
# carry over metadata from the existing file because it could
# include human-defined properties.
description['data_model'].bands = new_bands
if isinstance(description['data_model'], models.TableSchema):
# If existing field metadata still matches data_model of the file
# carry over existing metadata because it could include
# human-defined properties.
new_fields = []
for field in description['schema'].fields:
for field in description['data_model'].fields:
try:
efield = existing_resource.get_field_description(
field.name)
# TODO: rewrite this as __eq__ of FieldSchema?
if field.type == efield.type:
field = dataclasses.replace(field, **efield.__dict__)
updated_dict = field.model_dump() | efield.model_dump()
field = models.FieldSchema(**updated_dict)
except KeyError:
pass
new_fields.append(field)
description['schema'].fields = new_fields
description['data_model'].fields = new_fields
# overwrite properties that are intrinsic to the dataset
resource = dataclasses.replace(
existing_resource, **description)
updated_dict = existing_resource.model_dump() | description
resource = RESOURCE_MODELS[resource_type](**updated_dict)

# Common path: metadata file does not already exist
# Or less common, ValueError if it exists but is incompatible
except (FileNotFoundError, ValueError):
except FileNotFoundError:
resource = RESOURCE_MODELS[resource_type](**description)

resource = resource.replace(user_profile)
Expand Down
Loading
Loading