Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/BuzzCutNorman/sdk into 2045…
Browse files Browse the repository at this point in the history
…-validate-records-with-fastjsonschema
  • Loading branch information
BuzzCutNorman committed Jan 29, 2024
2 parents fe43b36 + 6844477 commit f055237
Show file tree
Hide file tree
Showing 44 changed files with 299 additions and 249 deletions.
1 change: 1 addition & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ jobs:
tests:
name: "Test on ${{ matrix.python-version }} (${{ matrix.session }}) / ${{ matrix.os }} / SQLAlchemy: ${{ matrix.sqlalchemy }}"
runs-on: ${{ matrix.os }}
continue-on-error: true
env:
NOXPYTHON: ${{ matrix.python-version }}
NOXSESSION: ${{ matrix.session }}
Expand Down
4 changes: 4 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ repos:
rev: v4.5.0
hooks:
- id: check-json
exclude: |
(?x)^(
.*/launch.json
)$
- id: check-toml
exclude: |
(?x)^(
Expand Down
2 changes: 2 additions & 0 deletions cookiecutter/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Include template VSCode directory
!*/*/.vscode/
2 changes: 1 addition & 1 deletion cookiecutter/tap-template/hooks/post_gen_project.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,4 @@
shutil.rmtree(".github")

if "{{ cookiecutter.ide }}" != "VSCode":
shutil.rmtree(".vscode")
shutil.rmtree(".vscode", ignore_errors=True)
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ repos:
rev: v4.5.0
hooks:
- id: check-json
exclude: |
(?x)^(
\.vscode/.*\.json
)$
- id: check-toml
- id: check-yaml
- id: end-of-file-fixer
Expand All @@ -20,7 +24,7 @@ repos:
- id: check-github-workflows

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.1.11
rev: v0.1.14
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix, --show-fixes]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "{{ cookiecutter.tap_id }}",
"type": "python",
"request": "launch",
"cwd": "${workspaceFolder}",
"program": "{{ cookiecutter.library_name }}",
"justMyCode": false,
"args": [
"--config",
".secrets/config.json",
],
},
]
}
5 changes: 5 additions & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,12 @@
"css/custom.css",
]

# -- Options for MyST --------------------------------------------------------
# https://myst-parser.readthedocs.io/en/latest/configuration.html
myst_heading_anchors = 3
myst_enable_extensions = {
"colon_fence",
}

redirects = {
"porting.html": "guides/porting.html",
Expand Down
4 changes: 4 additions & 0 deletions docs/dev_guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,10 @@ pipx install poetry
pipx install tox
```

:::{tip}
The minimum recommended version of cookiecutter is `2.2.0` (released 2023-07-06).
:::

Now you can initialize your new project with the Cookiecutter template for taps:

```bash
Expand Down
101 changes: 40 additions & 61 deletions poetry.lock

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,13 @@ inflection = ">=0.5.1"
joblib = ">=1.0.1"
jsonpath-ng = ">=1.5.3"
jsonschema = ">=4.16.0"
memoization = { version = ">=0.3.2,<0.5.0", python = "<4" }
packaging = ">=23.1"
pendulum = ">=2.1.0,<4"
PyJWT = "~=2.4"
python-dateutil = ">=2.8.2"
python-dotenv = ">=0.20"
PyYAML = ">=6.0"
referencing = ">=0.30.0"
requests = ">=2.25.1"
simpleeval = ">=0.9.13"
simplejson = ">=3.17.6"
Expand Down Expand Up @@ -188,6 +188,7 @@ exclude_also = [
'''if (t\.)?TYPE_CHECKING:''',
]
fail_under = 82
show_missing = true

[tool.mypy]
exclude = "tests"
Expand Down
7 changes: 6 additions & 1 deletion singer_sdk/_singerlib/messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,12 @@ def format_message(message: Message) -> str:
Returns:
The formatted message.
"""
return json.dumps(message.to_dict(), use_decimal=True, default=_default_encoding)
return json.dumps(
message.to_dict(),
use_decimal=True,
default=_default_encoding,
separators=(",", ":"),
)


def write_message(message: Message) -> None:
Expand Down
22 changes: 17 additions & 5 deletions singer_sdk/_singerlib/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,11 @@
import typing as t
from dataclasses import dataclass

from jsonschema import RefResolver
from referencing import Registry
from referencing.jsonschema import DRAFT202012

if t.TYPE_CHECKING:
from referencing._core import Resolver

# These are keys defined in the JSON Schema spec that do not themselves contain
# schemas (or lists of schemas)
Expand Down Expand Up @@ -148,17 +152,25 @@ def resolve_schema_references(
A schema dict with all $refs replaced with the appropriate dict.
"""
refs = refs or {}
return _resolve_schema_references(schema, RefResolver("", schema, store=refs))
registry: Registry = Registry()
schema_resource = DRAFT202012.create_resource(schema)
registry = registry.with_resource("", schema_resource)
registry = registry.with_resources(
[(k, DRAFT202012.create_resource(v)) for k, v in refs.items()]
)

resolver = registry.resolver()
return _resolve_schema_references(schema, resolver)


def _resolve_schema_references(
schema: dict[str, t.Any],
resolver: RefResolver,
resolver: Resolver,
) -> dict[str, t.Any]:
if _SchemaKey.ref in schema:
reference_path = schema.pop(_SchemaKey.ref, None)
resolved = resolver.resolve(reference_path)[1]
schema.update(resolved)
resolved = resolver.lookup(reference_path)
schema.update(resolved.contents)
return _resolve_schema_references(schema, resolver)

if _SchemaKey.properties in schema:
Expand Down
35 changes: 35 additions & 0 deletions singer_sdk/about.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
from collections import OrderedDict
from textwrap import dedent

from packaging.specifiers import SpecifierSet
from packaging.version import Version

if t.TYPE_CHECKING:
from singer_sdk.helpers.capabilities import CapabilitiesEnum

Expand All @@ -19,6 +22,38 @@
"MarkdownFormatter",
]

# Keep these in sync with the supported Python versions in pyproject.toml
_PY_MIN_VERSION = 8
_PY_MAX_VERSION = 12


def _get_min_version(specifiers: SpecifierSet) -> int:
min_version: list[int] = []
for specifier in specifiers:
if specifier.operator == ">=":
min_version.append(Version(specifier.version).minor)
if specifier.operator == ">":
min_version.append(Version(specifier.version).minor + 1)
return min(min_version, default=_PY_MIN_VERSION)


def _get_max_version(specifiers: SpecifierSet) -> int:
max_version: list[int] = []
for specifier in specifiers:
if specifier.operator == "<=":
max_version.append(Version(specifier.version).minor)
if specifier.operator == "<":
max_version.append(Version(specifier.version).minor - 1)
return max(max_version, default=_PY_MAX_VERSION)


def get_supported_pythons(requires_python: str) -> t.Generator[str, None, None]:
specifiers = SpecifierSet(requires_python)
min_version = _get_min_version(specifiers)
max_version = _get_max_version(specifiers)

yield from specifiers.filter(f"3.{v}" for v in range(min_version, max_version + 1))


@dataclasses.dataclass
class AboutInfo:
Expand Down
19 changes: 6 additions & 13 deletions singer_sdk/helpers/_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,28 +5,25 @@
import typing as t
from copy import deepcopy

from memoization import cached

from singer_sdk.helpers._typing import is_object_type

if t.TYPE_CHECKING:
from logging import Logger

from singer_sdk._singerlib import Catalog, SelectionMask

_MAX_LRU_CACHE = 500


@cached(max_size=_MAX_LRU_CACHE)
# TODO: this was previously cached using the `memoization` library. However, the
# `functools.lru_cache` decorator does not support non-hashable arguments.
# It is possible that this function is not a bottleneck, but if it is, we should
# consider implementing a custom LRU cache decorator that supports non-hashable
# arguments.
def get_selected_schema(
stream_name: str,
schema: dict,
mask: SelectionMask,
logger: Logger,
) -> dict:
"""Return a copy of the provided JSON schema, dropping any fields not selected."""
new_schema = deepcopy(schema)
_pop_deselected_schema(new_schema, mask, stream_name, (), logger)
_pop_deselected_schema(new_schema, mask, stream_name, ())
return new_schema


Expand All @@ -35,7 +32,6 @@ def _pop_deselected_schema(
mask: SelectionMask,
stream_name: str,
breadcrumb: tuple[str, ...],
logger: Logger,
) -> None:
"""Remove anything from schema that is not selected.
Expand Down Expand Up @@ -75,15 +71,13 @@ def _pop_deselected_schema(
mask,
stream_name,
property_breadcrumb,
logger,
)


def pop_deselected_record_properties(
record: dict[str, t.Any],
schema: dict,
mask: SelectionMask,
logger: Logger,
breadcrumb: tuple[str, ...] = (),
) -> None:
"""Remove anything from record properties that is not selected.
Expand All @@ -104,7 +98,6 @@ def pop_deselected_record_properties(
val,
schema,
mask,
logger,
property_breadcrumb,
)

Expand Down
4 changes: 2 additions & 2 deletions singer_sdk/helpers/jsonpath.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

import logging
import typing as t
from functools import lru_cache

import memoization
from jsonpath_ng.ext import parse

if t.TYPE_CHECKING:
Expand Down Expand Up @@ -39,7 +39,7 @@ def extract_jsonpath(
yield match.value


@memoization.cached
@lru_cache
def _compile_jsonpath(expression: str) -> jsonpath_ng.JSONPath:
"""Parse a JSONPath expression and cache the result.
Expand Down
1 change: 0 additions & 1 deletion singer_sdk/mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -657,7 +657,6 @@ def register_raw_streams_from_catalog(self, catalog: Catalog) -> None:
catalog_entry.stream or catalog_entry.tap_stream_id,
catalog_entry.schema.to_dict(),
catalog_entry.metadata.resolve_selection(),
self.logger,
),
catalog_entry.key_properties,
)
Expand Down
32 changes: 1 addition & 31 deletions singer_sdk/plugin_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@

import click
from jsonschema import Draft7Validator
from packaging.specifiers import SpecifierSet

from singer_sdk import about, metrics
from singer_sdk.cli import plugin_cli
Expand All @@ -36,30 +35,6 @@
from singer_sdk.typing import extend_validator_with_defaults

SDK_PACKAGE_NAME = "singer_sdk"
CHECK_SUPPORTED_PYTHON_VERSIONS = (
# unsupported versions
"2.7",
"3.0",
"3.1",
"3.2",
"3.3",
"3.4",
"3.5",
"3.6",
"3.7",
# current supported versions
"3.8",
"3.9",
"3.10",
"3.11",
"3.12",
# future supported versions
"3.13",
"3.14",
"3.15",
"3.16",
)


JSONSchemaValidator = extend_validator_with_defaults(Draft7Validator)

Expand Down Expand Up @@ -300,12 +275,7 @@ def _get_supported_python_versions(package: str) -> list[str] | None:
except metadata.PackageNotFoundError:
return None

reported_python_versions = SpecifierSet(package_metadata["Requires-Python"])
return [
version
for version in CHECK_SUPPORTED_PYTHON_VERSIONS
if version in reported_python_versions
]
return list(about.get_supported_pythons(package_metadata["Requires-Python"]))

@classmethod
def get_plugin_version(cls) -> str:
Expand Down
2 changes: 1 addition & 1 deletion singer_sdk/streams/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -827,7 +827,7 @@ def _generate_record_messages(
Yields:
Record message objects.
"""
pop_deselected_record_properties(record, self.schema, self.mask, self.logger)
pop_deselected_record_properties(record, self.schema, self.mask)
record = conform_record_data_types(
stream_name=self.name,
record=record,
Expand Down
1 change: 0 additions & 1 deletion singer_sdk/streams/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,6 @@ def get_selected_schema(self) -> dict:
stream_name=self.name,
schema=self.schema,
mask=self.mask,
logger=self.logger,
)

# Get records from stream
Expand Down
Loading

0 comments on commit f055237

Please sign in to comment.