From bfb7baaaa5743d09e37c79c714c905e716a65283 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Ram=C3=ADrez=20Mondrag=C3=B3n?= Date: Thu, 28 Sep 2023 17:26:51 -0600 Subject: [PATCH] fix(targets): Safely skip parsing record field as date-time if it is missing in schema (#1844) --- singer_sdk/sinks/core.py | 9 ++++--- tests/conftest.py | 2 ++ tests/core/sinks/test_validation.py | 41 +++++++++++++++++++++++++++++ 3 files changed, 49 insertions(+), 3 deletions(-) create mode 100644 tests/core/sinks/test_validation.py diff --git a/singer_sdk/sinks/core.py b/singer_sdk/sinks/core.py index 19dfbc31a..e578c2084 100644 --- a/singer_sdk/sinks/core.py +++ b/singer_sdk/sinks/core.py @@ -365,12 +365,15 @@ def _parse_timestamps_in_record( schema: TODO treatment: TODO """ - for key in record: + for key, value in record.items(): + if key not in schema["properties"]: + self.logger.warning("No schema for record field '%s'", key) + continue datelike_type = get_datelike_property_type(schema["properties"][key]) if datelike_type: - date_val = record[key] + date_val = value try: - if record[key] is not None: + if value is not None: date_val = parser.parse(date_val) except parser.ParserError as ex: date_val = handle_invalid_timestamp_in_record( diff --git a/tests/conftest.py b/tests/conftest.py index 7e7c39958..cb392bd3a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -12,6 +12,7 @@ from singer_sdk import SQLConnector from singer_sdk import typing as th +from singer_sdk.helpers._typing import DatetimeErrorTreatmentEnum from singer_sdk.helpers.capabilities import PluginCapabilities from singer_sdk.sinks import BatchSink, SQLSink from singer_sdk.target_base import SQLTarget, Target @@ -75,6 +76,7 @@ class BatchSinkMock(BatchSink): """A mock Sink class.""" name = "batch-sink-mock" + datetime_error_treatment = DatetimeErrorTreatmentEnum.MAX def __init__( self, diff --git a/tests/core/sinks/test_validation.py b/tests/core/sinks/test_validation.py new file mode 100644 index 000000000..5a7ca39a3 --- /dev/null +++ b/tests/core/sinks/test_validation.py @@ -0,0 +1,41 @@ +from __future__ import annotations + +import datetime + +from tests.conftest import BatchSinkMock, TargetMock + + +def test_validate_record(): + target = TargetMock() + sink = BatchSinkMock( + target, + "users", + { + "type": "object", + "properties": { + "id": {"type": "integer"}, + "created_at": {"type": "string", "format": "date-time"}, + "invalid_datetime": {"type": "string", "format": "date-time"}, + }, + }, + ["id"], + ) + + record = { + "id": 1, + "created_at": "2021-01-01T00:00:00+00:00", + "missing_datetime": "2021-01-01T00:00:00+00:00", + "invalid_datetime": "not a datetime", + } + updated_record = sink._validate_and_parse(record) + + assert updated_record["created_at"] == datetime.datetime( + 2021, + 1, + 1, + 0, + 0, + tzinfo=datetime.timezone.utc, + ) + assert updated_record["missing_datetime"] == "2021-01-01T00:00:00+00:00" + assert updated_record["invalid_datetime"] == "9999-12-31 23:59:59.999999"