Skip to content

Commit

Permalink
Merge branch 'main' into 1046-feat-faster-json-dumps-with-msgspec
Browse files Browse the repository at this point in the history
  • Loading branch information
edgarrmondragon committed Jan 27, 2024
2 parents 3913955 + f798f46 commit 60e591e
Show file tree
Hide file tree
Showing 10 changed files with 34 additions and 27 deletions.
1 change: 1 addition & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ jobs:
tests:
name: "Test on ${{ matrix.python-version }} (${{ matrix.session }}) / ${{ matrix.os }} / SQLAlchemy: ${{ matrix.sqlalchemy }}"
runs-on: ${{ matrix.os }}
continue-on-error: true
env:
NOXPYTHON: ${{ matrix.python-version }}
NOXSESSION: ${{ matrix.session }}
Expand Down
13 changes: 12 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ PyJWT = "~=2.4"
python-dateutil = ">=2.8.2"
python-dotenv = ">=0.20"
PyYAML = ">=6.0"
referencing = ">=0.30.0"
requests = ">=2.25.1"
simpleeval = ">=0.9.13"
simplejson = ">=3.17.6"
Expand Down
22 changes: 17 additions & 5 deletions singer_sdk/_singerlib/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,11 @@
import typing as t
from dataclasses import dataclass

from jsonschema import RefResolver
from referencing import Registry
from referencing.jsonschema import DRAFT202012

if t.TYPE_CHECKING:
from referencing._core import Resolver

# These are keys defined in the JSON Schema spec that do not themselves contain
# schemas (or lists of schemas)
Expand Down Expand Up @@ -148,17 +152,25 @@ def resolve_schema_references(
A schema dict with all $refs replaced with the appropriate dict.
"""
refs = refs or {}
return _resolve_schema_references(schema, RefResolver("", schema, store=refs))
registry: Registry = Registry()
schema_resource = DRAFT202012.create_resource(schema)
registry = registry.with_resource("", schema_resource)
registry = registry.with_resources(
[(k, DRAFT202012.create_resource(v)) for k, v in refs.items()]
)

resolver = registry.resolver()
return _resolve_schema_references(schema, resolver)


def _resolve_schema_references(
schema: dict[str, t.Any],
resolver: RefResolver,
resolver: Resolver,
) -> dict[str, t.Any]:
if _SchemaKey.ref in schema:
reference_path = schema.pop(_SchemaKey.ref, None)
resolved = resolver.resolve(reference_path)[1]
schema.update(resolved)
resolved = resolver.lookup(reference_path)
schema.update(resolved.contents)
return _resolve_schema_references(schema, resolver)

if _SchemaKey.properties in schema:
Expand Down
9 changes: 1 addition & 8 deletions singer_sdk/helpers/_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@
from singer_sdk.helpers._typing import is_object_type

if t.TYPE_CHECKING:
from logging import Logger

from singer_sdk._singerlib import Catalog, SelectionMask

_MAX_LRU_CACHE = 500
Expand All @@ -22,11 +20,10 @@ def get_selected_schema(
stream_name: str,
schema: dict,
mask: SelectionMask,
logger: Logger,
) -> dict:
"""Return a copy of the provided JSON schema, dropping any fields not selected."""
new_schema = deepcopy(schema)
_pop_deselected_schema(new_schema, mask, stream_name, (), logger)
_pop_deselected_schema(new_schema, mask, stream_name, ())
return new_schema


Expand All @@ -35,7 +32,6 @@ def _pop_deselected_schema(
mask: SelectionMask,
stream_name: str,
breadcrumb: tuple[str, ...],
logger: Logger,
) -> None:
"""Remove anything from schema that is not selected.
Expand Down Expand Up @@ -75,15 +71,13 @@ def _pop_deselected_schema(
mask,
stream_name,
property_breadcrumb,
logger,
)


def pop_deselected_record_properties(
record: dict[str, t.Any],
schema: dict,
mask: SelectionMask,
logger: Logger,
breadcrumb: tuple[str, ...] = (),
) -> None:
"""Remove anything from record properties that is not selected.
Expand All @@ -104,7 +98,6 @@ def pop_deselected_record_properties(
val,
schema,
mask,
logger,
property_breadcrumb,
)

Expand Down
1 change: 0 additions & 1 deletion singer_sdk/mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -657,7 +657,6 @@ def register_raw_streams_from_catalog(self, catalog: Catalog) -> None:
catalog_entry.stream or catalog_entry.tap_stream_id,
catalog_entry.schema.to_dict(),
catalog_entry.metadata.resolve_selection(),
self.logger,
),
catalog_entry.key_properties,
)
Expand Down
2 changes: 1 addition & 1 deletion singer_sdk/streams/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -827,7 +827,7 @@ def _generate_record_messages(
Yields:
Record message objects.
"""
pop_deselected_record_properties(record, self.schema, self.mask, self.logger)
pop_deselected_record_properties(record, self.schema, self.mask)
record = conform_record_data_types(
stream_name=self.name,
record=record,
Expand Down
1 change: 0 additions & 1 deletion singer_sdk/streams/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,6 @@ def get_selected_schema(self) -> dict:
stream_name=self.name,
schema=self.schema,
mask=self.mask,
logger=self.logger,
)

# Get records from stream
Expand Down
8 changes: 1 addition & 7 deletions tests/core/test_catalog_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,12 +197,7 @@ def test_schema_selection(
stream_name: str,
):
"""Test that schema selection rules are correctly applied to SCHEMA messages."""
selected_schema = get_selected_schema(
stream_name,
schema,
mask,
logging.getLogger(),
)
selected_schema = get_selected_schema(stream_name, schema, mask)
assert (
selected_schema["properties"]
== PropertiesList(
Expand Down Expand Up @@ -247,7 +242,6 @@ def test_record_property_pop(
record=record_pop,
schema=schema,
mask=mask,
logger=logging.getLogger(),
breadcrumb=(),
)

Expand Down
3 changes: 0 additions & 3 deletions tests/samples/test_tap_countries.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import copy
import io
import json
import logging
import typing as t
from contextlib import redirect_stdout

Expand Down Expand Up @@ -89,15 +88,13 @@ def test_with_catalog_entry():
record=copied_record,
schema=stream.schema,
mask=stream.mask,
logger=logging.getLogger(),
)
assert copied_record == record

new_schema = get_selected_schema(
stream_name=stream.name,
schema=stream.schema,
mask=stream.metadata.resolve_selection(),
logger=logging.getLogger(),
)
assert new_schema == stream.schema

Expand Down

0 comments on commit 60e591e

Please sign in to comment.