From e1812b676d59406a1595bad7ac1fa112d1b6b586 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 12 Aug 2024 07:02:52 -0600 Subject: [PATCH 001/137] chore(deps-dev): bump deptry from 0.19.0 to 0.19.1 in the development-dependencies group (#2600) chore(deps-dev): bump deptry in the development-dependencies group Bumps the development-dependencies group with 1 update: [deptry](https://github.com/fpgmaas/deptry). Updates `deptry` from 0.19.0 to 0.19.1 - [Release notes](https://github.com/fpgmaas/deptry/releases) - [Changelog](https://github.com/fpgmaas/deptry/blob/main/CHANGELOG.md) - [Commits](https://github.com/fpgmaas/deptry/compare/0.19.0...0.19.1) --- updated-dependencies: - dependency-name: deptry dependency-type: direct:development update-type: version-update:semver-patch dependency-group: development-dependencies ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/poetry.lock b/poetry.lock index a238790e17..b5e2b63bbe 100644 --- a/poetry.lock +++ b/poetry.lock @@ -520,22 +520,23 @@ test-randomorder = ["pytest-randomly"] [[package]] name = "deptry" -version = "0.19.0" +version = "0.19.1" description = "A command line utility to check for unused, missing and transitive dependencies in a Python project." optional = false python-versions = ">=3.8" files = [ - {file = "deptry-0.19.0-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:074bfb613c1789e7489c735159356a8e7f260b0cf85193c6cc5887abcdabe5cc"}, - {file = "deptry-0.19.0-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:8fdfe2531fa773e5849b46d8d0ca851341aeeba3dc285b1a3f560a2a468676ba"}, - {file = "deptry-0.19.0-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:248c34d78f83da111379a43e37c78d5e049661735e43ab5934307b2ba265431d"}, - {file = "deptry-0.19.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b18c45b52c2822fd1186e73e22274ceec85384ed0616d0454ae874724ac0846"}, - {file = "deptry-0.19.0-cp38-abi3-win_amd64.whl", hash = "sha256:79028cbc885ff8cd0a11fc0954bb0b552bf656fe6df73084df7014cbd902516c"}, - {file = "deptry-0.19.0-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:4fb158d0e4747e8dc21b2160f9a11540c01ede318cf558d702d485cb89119214"}, - {file = "deptry-0.19.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:bd80e70a2b306732d9c35019480a32452c00f74cbbe81b4a9791917fceeb2a0d"}, - {file = "deptry-0.19.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25f3b0e46b9936b3e9bb08aa9aa2f88a56d7d3bc14f0180844da4997670f21ab"}, - {file = "deptry-0.19.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bbd13b8619d4fcfa0a06f752de5b92d552bdf71cfed41b2cf22cd9a16e28f4f2"}, - {file = "deptry-0.19.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:fe61febc0ba979b20678d232daa4c6df31949f63d5dcaa5c363a5f1ffac9b887"}, - {file = "deptry-0.19.0.tar.gz", hash = "sha256:df5899d63a4e607bc9b2a091483b8e07ea98e021f2872defb1fd44573ae8c9a7"}, + {file = "deptry-0.19.1-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:3a20ef0dd1c737fb05553d1b9c2fa9f185d0c9d3d881d255334cef401ffdc599"}, + {file = "deptry-0.19.1-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:2c6b2df353e5113fd2f787c2f7e694657548d388929e988e8644bd178e19fc5c"}, + {file = "deptry-0.19.1-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a407bab3486e3844f93d702f1a381942873b2a46056c693b5634bbde219bb056"}, + {file = "deptry-0.19.1-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:43f33789b97b47313609e92b62fabf8a71bba0d35a7476806da5d3d152e32345"}, + {file = "deptry-0.19.1-cp38-abi3-win_amd64.whl", hash = "sha256:0bad85a77b31360d0f52383b14783fdae4a201b597c0158fe10e91a779c67079"}, + {file = "deptry-0.19.1-cp38-abi3-win_arm64.whl", hash = "sha256:c59142d9dca8873325692fbb7aa1d2902fde87020dcc8102f75120ba95515172"}, + {file = "deptry-0.19.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:a1abc119f9c8536b8ab1ee2122d4130665f33225d00d8615256ce354eb2c11ba"}, + {file = "deptry-0.19.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:7344c6cea032b549d86e156aa1e679fb94cd44deb7e93f25cb6d9c0ded5ea06f"}, + {file = "deptry-0.19.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ff7d8954265c48ea334fdd508339c51d3fba05e2d4a8be47712c69d1c8d35c94"}, + {file = "deptry-0.19.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:023073247e5dac21254bf7b600ca2e2b71560652d2dfbe11535445ee912ca059"}, + {file = "deptry-0.19.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:af8a0a9c42f8f92dfbc048e724fa89b9131f032f7e245812260560c214395abf"}, + {file = "deptry-0.19.1.tar.gz", hash = "sha256:1c12fea1d2301f42c7035c5636e4b9421457fde256fe7a241245662d20b4c841"}, ] [package.dependencies] From c3c23512aa374978d65b8d7ad8fcf6b58ccb9e7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Ram=C3=ADrez=20Mondrag=C3=B3n?= <16805946+edgarrmondragon@users.noreply.github.com> Date: Tue, 13 Aug 2024 12:59:48 -0600 Subject: [PATCH 002/137] refactor: Improved SQL identifier (de)normalization (#2601) * refactor: WIP Improved SQL identifier (de)normalization * Add tests * Do not use db dialect to generate stream name * Fix types --- singer_sdk/connectors/sql.py | 170 ++++++++++++++++++++++--------- singer_sdk/sinks/sql.py | 9 +- singer_sdk/streams/sql.py | 3 +- tests/core/test_connector_sql.py | 34 +++++++ 4 files changed, 162 insertions(+), 54 deletions(-) diff --git a/singer_sdk/connectors/sql.py b/singer_sdk/connectors/sql.py index f482226406..24f7be756e 100644 --- a/singer_sdk/connectors/sql.py +++ b/singer_sdk/connectors/sql.py @@ -5,6 +5,7 @@ import logging import typing as t import warnings +from collections import UserString from contextlib import contextmanager from datetime import datetime from functools import lru_cache @@ -22,6 +23,89 @@ from sqlalchemy.engine.reflection import Inspector +class FullyQualifiedName(UserString): + """A fully qualified table name. + + This class provides a simple way to represent a fully qualified table name + as a single object. The string representation of this object is the fully + qualified table name, with the parts separated by periods. + + The parts of the fully qualified table name are: + - database + - schema + - table + + The database and schema are optional. If only the table name is provided, + the string representation of the object will be the table name alone. + + Example: + ``` + table_name = FullyQualifiedName("my_table", "my_schema", "my_db") + print(table_name) # my_db.my_schema.my_table + ``` + """ + + def __init__( + self, + *, + table: str = "", + schema: str | None = None, + database: str | None = None, + delimiter: str = ".", + dialect: sa.engine.Dialect, + ) -> None: + """Initialize the fully qualified table name. + + Args: + table: The name of the table. + schema: The name of the schema. Defaults to None. + database: The name of the database. Defaults to None. + delimiter: The delimiter to use between parts. Defaults to '.'. + dialect: The SQLAlchemy dialect to use for quoting. + + Raises: + ValueError: If the fully qualified name could not be generated. + """ + self.table = table + self.schema = schema + self.database = database + self.delimiter = delimiter + self.dialect = dialect + + parts = [] + if self.database: + parts.append(self.prepare_part(self.database)) + if self.schema: + parts.append(self.prepare_part(self.schema)) + if self.table: + parts.append(self.prepare_part(self.table)) + + if not parts: + raise ValueError( + "Could not generate fully qualified name: " + + ":".join( + [ + self.database or "(unknown-db)", + self.schema or "(unknown-schema)", + self.table or "(unknown-table-name)", + ], + ), + ) + + super().__init__(self.delimiter.join(parts)) + + def prepare_part(self, part: str) -> str: + """Prepare a part of the fully qualified name. + + Args: + part: The part to prepare. + + Returns: + The prepared part. + """ + return self.dialect.identifier_preparer.quote(part) + + class SQLConnector: # noqa: PLR0904 """Base class for SQLAlchemy-based connectors. @@ -238,13 +322,13 @@ def to_sql_type(jsonschema_type: dict) -> sa.types.TypeEngine: """ return th.to_sql_type(jsonschema_type) - @staticmethod def get_fully_qualified_name( + self, table_name: str | None = None, schema_name: str | None = None, db_name: str | None = None, delimiter: str = ".", - ) -> str: + ) -> FullyQualifiedName: """Concatenates a fully qualified name from the parts. Args: @@ -253,34 +337,16 @@ def get_fully_qualified_name( db_name: The name of the database. Defaults to None. delimiter: Generally: '.' for SQL names and '-' for Singer names. - Raises: - ValueError: If all 3 name parts not supplied. - Returns: The fully qualified name as a string. """ - parts = [] - - if db_name: - parts.append(db_name) - if schema_name: - parts.append(schema_name) - if table_name: - parts.append(table_name) - - if not parts: - raise ValueError( - "Could not generate fully qualified name: " - + ":".join( - [ - db_name or "(unknown-db)", - schema_name or "(unknown-schema)", - table_name or "(unknown-table-name)", - ], - ), - ) - - return delimiter.join(parts) + return FullyQualifiedName( + table=table_name, # type: ignore[arg-type] + schema=schema_name, + database=db_name, + delimiter=delimiter, + dialect=self._dialect, + ) @property def _dialect(self) -> sa.engine.Dialect: @@ -429,12 +495,7 @@ def discover_catalog_entry( `CatalogEntry` object for the given table or a view """ # Initialize unique stream name - unique_stream_id = self.get_fully_qualified_name( - db_name=None, - schema_name=schema_name, - table_name=table_name, - delimiter="-", - ) + unique_stream_id = f"{schema_name}-{table_name}" # Detect key properties possible_primary_keys: list[list[str]] = [] @@ -528,7 +589,7 @@ def discover_catalog_entries(self) -> list[dict]: def parse_full_table_name( # noqa: PLR6301 self, - full_table_name: str, + full_table_name: str | FullyQualifiedName, ) -> tuple[str | None, str | None, str]: """Parse a fully qualified table name into its parts. @@ -547,6 +608,13 @@ def parse_full_table_name( # noqa: PLR6301 A three part tuple (db_name, schema_name, table_name) with any unspecified or unused parts returned as None. """ + if isinstance(full_table_name, FullyQualifiedName): + return ( + full_table_name.database, + full_table_name.schema, + full_table_name.table, + ) + db_name: str | None = None schema_name: str | None = None @@ -560,7 +628,7 @@ def parse_full_table_name( # noqa: PLR6301 return db_name, schema_name, table_name - def table_exists(self, full_table_name: str) -> bool: + def table_exists(self, full_table_name: str | FullyQualifiedName) -> bool: """Determine if the target table already exists. Args: @@ -587,7 +655,7 @@ def schema_exists(self, schema_name: str) -> bool: def get_table_columns( self, - full_table_name: str, + full_table_name: str | FullyQualifiedName, column_names: list[str] | None = None, ) -> dict[str, sa.Column]: """Return a list of table columns. @@ -618,7 +686,7 @@ def get_table_columns( def get_table( self, - full_table_name: str, + full_table_name: str | FullyQualifiedName, column_names: list[str] | None = None, ) -> sa.Table: """Return a table object. @@ -643,7 +711,9 @@ def get_table( schema=schema_name, ) - def column_exists(self, full_table_name: str, column_name: str) -> bool: + def column_exists( + self, full_table_name: str | FullyQualifiedName, column_name: str + ) -> bool: """Determine if the target table already exists. Args: @@ -666,7 +736,7 @@ def create_schema(self, schema_name: str) -> None: def create_empty_table( self, - full_table_name: str, + full_table_name: str | FullyQualifiedName, schema: dict, primary_keys: t.Sequence[str] | None = None, partition_keys: list[str] | None = None, @@ -715,7 +785,7 @@ def create_empty_table( def _create_empty_column( self, - full_table_name: str, + full_table_name: str | FullyQualifiedName, column_name: str, sql_type: sa.types.TypeEngine, ) -> None: @@ -753,7 +823,7 @@ def prepare_schema(self, schema_name: str) -> None: def prepare_table( self, - full_table_name: str, + full_table_name: str | FullyQualifiedName, schema: dict, primary_keys: t.Sequence[str], partition_keys: list[str] | None = None, @@ -797,7 +867,7 @@ def prepare_table( def prepare_column( self, - full_table_name: str, + full_table_name: str | FullyQualifiedName, column_name: str, sql_type: sa.types.TypeEngine, ) -> None: @@ -822,7 +892,9 @@ def prepare_column( sql_type=sql_type, ) - def rename_column(self, full_table_name: str, old_name: str, new_name: str) -> None: + def rename_column( + self, full_table_name: str | FullyQualifiedName, old_name: str, new_name: str + ) -> None: """Rename the provided columns. Args: @@ -951,7 +1023,7 @@ def _get_type_sort_key( def _get_column_type( self, - full_table_name: str, + full_table_name: str | FullyQualifiedName, column_name: str, ) -> sa.types.TypeEngine: """Get the SQL type of the declared column. @@ -976,7 +1048,7 @@ def _get_column_type( def get_column_add_ddl( self, - table_name: str, + table_name: str | FullyQualifiedName, column_name: str, column_type: sa.types.TypeEngine, ) -> sa.DDL: @@ -1009,7 +1081,7 @@ def get_column_add_ddl( @staticmethod def get_column_rename_ddl( - table_name: str, + table_name: str | FullyQualifiedName, column_name: str, new_column_name: str, ) -> sa.DDL: @@ -1037,7 +1109,7 @@ def get_column_rename_ddl( @staticmethod def get_column_alter_ddl( - table_name: str, + table_name: str | FullyQualifiedName, column_name: str, column_type: sa.types.TypeEngine, ) -> sa.DDL: @@ -1096,7 +1168,7 @@ def update_collation( def _adapt_column_type( self, - full_table_name: str, + full_table_name: str | FullyQualifiedName, column_name: str, sql_type: sa.types.TypeEngine, ) -> None: @@ -1187,7 +1259,7 @@ def deserialize_json(self, json_str: str) -> object: # noqa: PLR6301 def delete_old_versions( self, *, - full_table_name: str, + full_table_name: str | FullyQualifiedName, version_column_name: str, current_version: int, ) -> None: diff --git a/singer_sdk/sinks/sql.py b/singer_sdk/sinks/sql.py index 33a7416145..0f7695ef00 100644 --- a/singer_sdk/sinks/sql.py +++ b/singer_sdk/sinks/sql.py @@ -21,6 +21,7 @@ if t.TYPE_CHECKING: from sqlalchemy.sql import Executable + from singer_sdk.connectors.sql import FullyQualifiedName from singer_sdk.target_base import Target _C = t.TypeVar("_C", bound=SQLConnector) @@ -109,7 +110,7 @@ def database_name(self) -> str | None: # Assumes single-DB target context. @property - def full_table_name(self) -> str: + def full_table_name(self) -> FullyQualifiedName: """Return the fully qualified table name. Returns: @@ -122,7 +123,7 @@ def full_table_name(self) -> str: ) @property - def full_schema_name(self) -> str: + def full_schema_name(self) -> FullyQualifiedName: """Return the fully qualified schema name. Returns: @@ -269,7 +270,7 @@ def process_batch(self, context: dict) -> None: def generate_insert_statement( self, - full_table_name: str, + full_table_name: str | FullyQualifiedName, schema: dict, ) -> str | Executable: """Generate an insert statement for the given records. @@ -297,7 +298,7 @@ def generate_insert_statement( def bulk_insert_records( self, - full_table_name: str, + full_table_name: str | FullyQualifiedName, schema: dict, records: t.Iterable[dict[str, t.Any]], ) -> int | None: diff --git a/singer_sdk/streams/sql.py b/singer_sdk/streams/sql.py index 9541598852..2877a505b0 100644 --- a/singer_sdk/streams/sql.py +++ b/singer_sdk/streams/sql.py @@ -14,6 +14,7 @@ from singer_sdk.streams.core import REPLICATION_INCREMENTAL, Stream if t.TYPE_CHECKING: + from singer_sdk.connectors.sql import FullyQualifiedName from singer_sdk.helpers.types import Context from singer_sdk.tap_base import Tap @@ -124,7 +125,7 @@ def primary_keys(self, new_value: t.Sequence[str]) -> None: self._singer_catalog_entry.metadata.root.table_key_properties = new_value @property - def fully_qualified_name(self) -> str: + def fully_qualified_name(self) -> FullyQualifiedName: """Generate the fully qualified version of the table name. Raises: diff --git a/tests/core/test_connector_sql.py b/tests/core/test_connector_sql.py index 10ee0c0f48..6a9a5e1893 100644 --- a/tests/core/test_connector_sql.py +++ b/tests/core/test_connector_sql.py @@ -7,9 +7,11 @@ import pytest import sqlalchemy as sa from sqlalchemy.dialects import registry, sqlite +from sqlalchemy.engine.default import DefaultDialect from samples.sample_duckdb import DuckDBConnector from singer_sdk.connectors import SQLConnector +from singer_sdk.connectors.sql import FullyQualifiedName from singer_sdk.exceptions import ConfigValidationError if t.TYPE_CHECKING: @@ -355,3 +357,35 @@ def create_engine(self) -> Engine: connector = CustomConnector(config={"sqlalchemy_url": "myrdbms:///"}) connector.create_engine() + + +def test_fully_qualified_name(): + dialect = DefaultDialect() + + fqn = FullyQualifiedName(table="my_table", dialect=dialect) + assert fqn == "my_table" + + fqn = FullyQualifiedName(schema="my_schema", table="my_table", dialect=dialect) + assert fqn == "my_schema.my_table" + + fqn = FullyQualifiedName( + database="my_catalog", + schema="my_schema", + table="my_table", + dialect=dialect, + ) + assert fqn == "my_catalog.my_schema.my_table" + + +def test_fully_qualified_name_with_quoting(): + dialect = DefaultDialect() + + fqn = FullyQualifiedName(table="order", schema="public", dialect=dialect) + assert fqn == 'public."order"' + + +def test_fully_qualified_name_empty_error(): + dialect = DefaultDialect() + + with pytest.raises(ValueError, match="Could not generate fully qualified name"): + FullyQualifiedName(dialect=dialect) From 477a5eb6879b9e133f1b43b821c26d590ab183bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Ram=C3=ADrez=20Mondrag=C3=B3n?= <16805946+edgarrmondragon@users.noreply.github.com> Date: Tue, 13 Aug 2024 13:40:50 -0600 Subject: [PATCH 003/137] refactor: Backwards-compatible identifier quoting in fully qualified names (#2603) --- singer_sdk/connectors/sql.py | 10 +++------- tests/core/test_connector_sql.py | 21 ++++++++++++--------- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/singer_sdk/connectors/sql.py b/singer_sdk/connectors/sql.py index 24f7be756e..b6a74a976a 100644 --- a/singer_sdk/connectors/sql.py +++ b/singer_sdk/connectors/sql.py @@ -52,7 +52,6 @@ def __init__( schema: str | None = None, database: str | None = None, delimiter: str = ".", - dialect: sa.engine.Dialect, ) -> None: """Initialize the fully qualified table name. @@ -61,7 +60,6 @@ def __init__( schema: The name of the schema. Defaults to None. database: The name of the database. Defaults to None. delimiter: The delimiter to use between parts. Defaults to '.'. - dialect: The SQLAlchemy dialect to use for quoting. Raises: ValueError: If the fully qualified name could not be generated. @@ -70,7 +68,6 @@ def __init__( self.schema = schema self.database = database self.delimiter = delimiter - self.dialect = dialect parts = [] if self.database: @@ -94,7 +91,7 @@ def __init__( super().__init__(self.delimiter.join(parts)) - def prepare_part(self, part: str) -> str: + def prepare_part(self, part: str) -> str: # noqa: PLR6301 """Prepare a part of the fully qualified name. Args: @@ -103,7 +100,7 @@ def prepare_part(self, part: str) -> str: Returns: The prepared part. """ - return self.dialect.identifier_preparer.quote(part) + return part class SQLConnector: # noqa: PLR0904 @@ -322,8 +319,8 @@ def to_sql_type(jsonschema_type: dict) -> sa.types.TypeEngine: """ return th.to_sql_type(jsonschema_type) + @staticmethod def get_fully_qualified_name( - self, table_name: str | None = None, schema_name: str | None = None, db_name: str | None = None, @@ -345,7 +342,6 @@ def get_fully_qualified_name( schema=schema_name, database=db_name, delimiter=delimiter, - dialect=self._dialect, ) @property diff --git a/tests/core/test_connector_sql.py b/tests/core/test_connector_sql.py index 6a9a5e1893..c8390f33d7 100644 --- a/tests/core/test_connector_sql.py +++ b/tests/core/test_connector_sql.py @@ -360,32 +360,35 @@ def create_engine(self) -> Engine: def test_fully_qualified_name(): - dialect = DefaultDialect() - - fqn = FullyQualifiedName(table="my_table", dialect=dialect) + fqn = FullyQualifiedName(table="my_table") assert fqn == "my_table" - fqn = FullyQualifiedName(schema="my_schema", table="my_table", dialect=dialect) + fqn = FullyQualifiedName(schema="my_schema", table="my_table") assert fqn == "my_schema.my_table" fqn = FullyQualifiedName( database="my_catalog", schema="my_schema", table="my_table", - dialect=dialect, ) assert fqn == "my_catalog.my_schema.my_table" def test_fully_qualified_name_with_quoting(): + class QuotedFullyQualifiedName(FullyQualifiedName): + def __init__(self, *, dialect: sa.engine.Dialect, **kwargs: t.Any): + self.dialect = dialect + super().__init__(**kwargs) + + def prepare_part(self, part: str) -> str: + return self.dialect.identifier_preparer.quote(part) + dialect = DefaultDialect() - fqn = FullyQualifiedName(table="order", schema="public", dialect=dialect) + fqn = QuotedFullyQualifiedName(table="order", schema="public", dialect=dialect) assert fqn == 'public."order"' def test_fully_qualified_name_empty_error(): - dialect = DefaultDialect() - with pytest.raises(ValueError, match="Could not generate fully qualified name"): - FullyQualifiedName(dialect=dialect) + FullyQualifiedName() From be349ac7a588126ecc056192ddbb18aaf57869eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Ram=C3=ADrez=20Mondrag=C3=B3n?= <16805946+edgarrmondragon@users.noreply.github.com> Date: Tue, 13 Aug 2024 14:30:05 -0600 Subject: [PATCH 004/137] feat(taps): Added a default user agent for REST and GraphQL taps (#2549) * feat: Added a default user agent * Remove redundant user-agent in sample * Use `cached_property` * Make backwards compatible and update cookiecutter --- .../{{cookiecutter.library_name}}/tap.py | 10 ++++++++++ .../tap_dummyjson/client.py | 4 ---- singer_sdk/streams/rest.py | 20 ++++++++++++++----- 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/cookiecutter/tap-template/{{cookiecutter.tap_id}}/{{cookiecutter.library_name}}/tap.py b/cookiecutter/tap-template/{{cookiecutter.tap_id}}/{{cookiecutter.library_name}}/tap.py index df3f9f7546..74c8927e98 100644 --- a/cookiecutter/tap-template/{{cookiecutter.tap_id}}/{{cookiecutter.library_name}}/tap.py +++ b/cookiecutter/tap-template/{{cookiecutter.tap_id}}/{{cookiecutter.library_name}}/tap.py @@ -50,6 +50,16 @@ class Tap{{ cookiecutter.source_name }}({{ 'SQL' if cookiecutter.stream_type == default="https://api.mysample.com", description="The url for the API service", ), + {%- if cookiecutter.stream_type in ("GraphQL", "REST") %} + th.Property( + "user_agent", + th.StringType, + description=( + "A custom User-Agent header to send with each request. Default is " + "'/'" + ), + ), + {%- endif %} ).to_dict() {%- if cookiecutter.stream_type in ("GraphQL", "REST", "Other") %} diff --git a/samples/sample_tap_dummy_json/tap_dummyjson/client.py b/samples/sample_tap_dummy_json/tap_dummyjson/client.py index c946675f3a..3f0ba2a917 100644 --- a/samples/sample_tap_dummy_json/tap_dummyjson/client.py +++ b/samples/sample_tap_dummy_json/tap_dummyjson/client.py @@ -28,10 +28,6 @@ def authenticator(self): password=self.config["password"], ) - @property - def http_headers(self): - return {"User-Agent": "tap-dummyjson"} - def get_new_paginator(self): return BaseOffsetPaginator(start_value=0, page_size=PAGE_SIZE) diff --git a/singer_sdk/streams/rest.py b/singer_sdk/streams/rest.py index 559389e8c6..76e2abf5dd 100644 --- a/singer_sdk/streams/rest.py +++ b/singer_sdk/streams/rest.py @@ -6,6 +6,7 @@ import copy import logging import typing as t +from functools import cached_property from http import HTTPStatus from urllib.parse import urlparse from warnings import warn @@ -100,7 +101,7 @@ def __init__( super().__init__(name=name, schema=schema, tap=tap) if path: self.path = path - self._http_headers: dict = {} + self._http_headers: dict = {"User-Agent": self.user_agent} self._requests_session = requests.Session() self._compiled_jsonpath = None self._next_page_token_compiled_jsonpath = None @@ -150,6 +151,18 @@ def requests_session(self) -> requests.Session: self._requests_session = requests.Session() return self._requests_session + @cached_property + def user_agent(self) -> str: + """Get the user agent string for the stream. + + Returns: + The user agent string. + """ + return self.config.get( + "user_agent", + f"{self.tap_name}/{self._tap.plugin_version}", + ) + def validate_response(self, response: requests.Response) -> None: """Validate HTTP response. @@ -553,10 +566,7 @@ def http_headers(self) -> dict: Returns: Dictionary of HTTP headers to use as a base for every request. """ - result = self._http_headers - if "user_agent" in self.config: - result["User-Agent"] = self.config.get("user_agent") - return result + return self._http_headers @property def timeout(self) -> int: From 1333278007c8e4daf03f82049de102f041bda878 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Ram=C3=ADrez=20Mondrag=C3=B3n?= <16805946+edgarrmondragon@users.noreply.github.com> Date: Tue, 13 Aug 2024 14:36:37 -0600 Subject: [PATCH 005/137] feat(mappers): Stream map expressions now have access to the `Faker` class, rather than just a faker instance (#2598) * faker libary, rather than just faker instance, is now available to mapper expressions (provided a faker config is found). * added section on using faker for data masking, including how to use the faker library to re-seed * Update docs/stream_maps.md * Update docs/stream_maps.md --------- Co-authored-by: michael_calvo Co-authored-by: Michael Calvo --- docs/stream_maps.md | 42 ++++++++++++++++++++++++++++++++++++++++++ singer_sdk/mapper.py | 3 +++ 2 files changed, 45 insertions(+) diff --git a/docs/stream_maps.md b/docs/stream_maps.md index e348833cf9..9d4a7d8a3c 100644 --- a/docs/stream_maps.md +++ b/docs/stream_maps.md @@ -249,6 +249,8 @@ can be referenced directly by mapping expressions. - `fake` - a [`Faker`](inv:faker:std:doc#index) instance, configurable via `faker_config` (see previous example) - see the built-in [standard providers](inv:faker:std:doc#providers) for available methods +- `Faker` - the [`Faker`](inv:faker:std:doc#fakerclass) class. This was made available to enable consistent data + masking by allowing users to call `Faker.seed()`. ```{tip} The `fake` object is only available if the plugin specifies `faker` as an additional dependency (through the `singer-sdk` `faker` extra, or directly). @@ -435,6 +437,46 @@ stream_maps: ``` ```` +### Masking data with Faker + +It is best practice (or even a legal requirement) to mask PII/PHI in lower environments. Stream mappers have access to the `Faker` library, which can be used to generate random data in various forms/formats. + +```yaml +stream_maps: + customers: + # IMPORTANT: the `fake` variable name will only be available if faker_config is defined + first_name: fake.first_name() # generates a new random name each time +faker_config: + # set specific seed + seed: 0 + # set specific locales + locale: + - en_US + - en_GB +``` + +Be sure to checkout the [`faker` documentation](https://faker.readthedocs.io/en/master/) for all the fake data generation possibilities. + +Note that in the example above, `faker` will generate a new random value each time the `first_name()` function is invoked. This means if 3 records have a `first_name` value of `Mike`, then they will each have a different name after being mapped (for example, `Alistair`, `Debra`, `Scooby`). This can actually lead to issues when developing in the lower environments. + +Some users require consistent masking (for example, the first name `Mike` is always masked as `Debra`). Consistent masking preserves the relationship between tables and rows, while still hiding the real value. When a random mask is generated every time, relationships between tables/rows are effectively lost, making it impossible to test things like sql `JOIN`s. This can cause highly unpredictable behavior when running the same code in lower environments vs production. + +To generate consistent masked values, you must provide the **same seed each time** before invoking the faker function. + +```yaml +stream_maps: + customers: + # will always generate the same value for the same seed + first_name: Faker.seed(_['first_name']) or fake.first_name() +faker_config: + # IMPORTANT: `fake` and `Faker` names are only available if faker_config is defined. + locale: en_US +``` + +Remember, these expressions are evaluated by the [`simpleval`](https://github.com/danthedeckie/simpleeval) expression library, which only allows a single python expression (which is the reason for the `or` syntax above). + +This means if you require more advanced masking logic, which cannot be defined in a single python expression, you may need to consider a custom stream mapper. + ### Aliasing a stream using `__alias__` To alias a stream, simply add the operation `"__alias__": "new_name"` to the stream diff --git a/singer_sdk/mapper.py b/singer_sdk/mapper.py index fce1277fb4..a2e7bc9564 100644 --- a/singer_sdk/mapper.py +++ b/singer_sdk/mapper.py @@ -337,7 +337,10 @@ def _eval( names["config"] = self.map_config # Allow map config access within transform if self.fake: + from faker import Faker # noqa: PLC0415 + names["fake"] = self.fake + names["Faker"] = Faker if property_name and property_name in record: # Allow access to original property value if applicable From 64f9e64d26f94c1f06c15c9f3e36bcc4e51b5910 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Ram=C3=ADrez=20Mondrag=C3=B3n?= <16805946+edgarrmondragon@users.noreply.github.com> Date: Tue, 13 Aug 2024 14:45:35 -0600 Subject: [PATCH 006/137] feat(taps): A new `schema_is_valid` built-in tap test validates stream schemas against the JSON Schema specification (#2567) --- singer_sdk/plugin_base.py | 8 ++++--- singer_sdk/sinks/core.py | 7 +++++- singer_sdk/testing/suites.py | 2 ++ singer_sdk/testing/tap_tests.py | 34 +++++++++++++++++++++++---- singer_sdk/typing.py | 10 ++++---- tests/core/sinks/test_validation.py | 16 ++++++------- tests/core/test_jsonschema_helpers.py | 4 ++-- 7 files changed, 58 insertions(+), 23 deletions(-) diff --git a/singer_sdk/plugin_base.py b/singer_sdk/plugin_base.py index f709fed954..eb5b39de36 100644 --- a/singer_sdk/plugin_base.py +++ b/singer_sdk/plugin_base.py @@ -13,7 +13,6 @@ from types import MappingProxyType import click -from jsonschema import Draft7Validator from singer_sdk import about, metrics from singer_sdk.cli import plugin_cli @@ -32,11 +31,14 @@ PluginCapabilities, ) from singer_sdk.mapper import PluginMapper -from singer_sdk.typing import extend_validator_with_defaults +from singer_sdk.typing import ( + DEFAULT_JSONSCHEMA_VALIDATOR, + extend_validator_with_defaults, +) SDK_PACKAGE_NAME = "singer_sdk" -JSONSchemaValidator = extend_validator_with_defaults(Draft7Validator) +JSONSchemaValidator = extend_validator_with_defaults(DEFAULT_JSONSCHEMA_VALIDATOR) class MapperNotInitialized(Exception): diff --git a/singer_sdk/sinks/core.py b/singer_sdk/sinks/core.py index 53533d58b3..e3c1ef5662 100644 --- a/singer_sdk/sinks/core.py +++ b/singer_sdk/sinks/core.py @@ -14,6 +14,7 @@ from types import MappingProxyType import jsonschema +import jsonschema.validators from typing_extensions import override from singer_sdk._singerlib.json import deserialize_json @@ -38,6 +39,7 @@ get_datelike_property_type, handle_invalid_timestamp_in_record, ) +from singer_sdk.typing import DEFAULT_JSONSCHEMA_VALIDATOR if t.TYPE_CHECKING: from logging import Logger @@ -88,7 +90,10 @@ def __init__( Raises: InvalidJSONSchema: If the schema provided from tap or mapper is invalid. """ - jsonschema_validator = jsonschema.Draft7Validator + jsonschema_validator = jsonschema.validators.validator_for( + schema, + DEFAULT_JSONSCHEMA_VALIDATOR, + ) super().__init__(schema) if validate_formats: diff --git a/singer_sdk/testing/suites.py b/singer_sdk/testing/suites.py index df93c86d27..fd53e6e19f 100644 --- a/singer_sdk/testing/suites.py +++ b/singer_sdk/testing/suites.py @@ -17,6 +17,7 @@ StreamRecordMatchesStreamSchema, StreamRecordSchemaMatchesCatalogTest, StreamReturnsRecordTest, + StreamSchemaIsValidTest, TapCLIPrintsTest, TapDiscoveryTest, TapStreamConnectionTest, @@ -72,6 +73,7 @@ class TestSuite(t.Generic[T]): StreamRecordMatchesStreamSchema, StreamRecordSchemaMatchesCatalogTest, StreamReturnsRecordTest, + StreamSchemaIsValidTest, StreamPrimaryKeysTest, ], ) diff --git a/singer_sdk/testing/tap_tests.py b/singer_sdk/testing/tap_tests.py index e5b0efc421..5839e0ceaf 100644 --- a/singer_sdk/testing/tap_tests.py +++ b/singer_sdk/testing/tap_tests.py @@ -5,11 +5,13 @@ import typing as t import warnings -from jsonschema import Draft7Validator +from jsonschema import validators +from jsonschema.exceptions import SchemaError import singer_sdk.helpers._typing as th from singer_sdk import Tap from singer_sdk.helpers._compat import datetime_fromisoformat +from singer_sdk.typing import DEFAULT_JSONSCHEMA_VALIDATOR from .templates import AttributeTestTemplate, StreamTestTemplate, TapTestTemplate @@ -71,6 +73,28 @@ def test(self) -> None: assert "progress_markers" not in final_state, self.message +class StreamSchemaIsValidTest(StreamTestTemplate): + """Test that a stream's schema is valid.""" + + name = "schema_is_valid" + + def test(self) -> None: + """Run test. + + Raises: + AssertionError: if schema is not valid. + """ + schema = self.stream.schema + default = DEFAULT_JSONSCHEMA_VALIDATOR + validator = validators.validator_for(schema, default=default) + + try: + validator.check_schema(schema) + except SchemaError as e: # pragma: no cover + msg = f"Schema is not valid: {e}" + raise AssertionError(msg) from e + + class StreamReturnsRecordTest(StreamTestTemplate): """Test that a stream sync returns at least 1 record.""" @@ -134,10 +158,10 @@ class StreamRecordMatchesStreamSchema(StreamTestTemplate): def test(self) -> None: """Run test.""" schema = self.stream.schema - validator = Draft7Validator( - schema, - format_checker=Draft7Validator.FORMAT_CHECKER, - ) + default = DEFAULT_JSONSCHEMA_VALIDATOR + validator = validators.validator_for(schema, default=default)(schema) + validator.format_checker = default.FORMAT_CHECKER + for record in self.stream_records: errors = list(validator.iter_errors(record)) error_messages = "\n".join( diff --git a/singer_sdk/typing.py b/singer_sdk/typing.py index a8ca332a44..6bf8d95278 100644 --- a/singer_sdk/typing.py +++ b/singer_sdk/typing.py @@ -58,9 +58,6 @@ import sqlalchemy as sa from jsonschema import ValidationError, validators -if t.TYPE_CHECKING: - from jsonschema.protocols import Validator - from singer_sdk.helpers._typing import ( JSONSCHEMA_ANNOTATION_SECRET, JSONSCHEMA_ANNOTATION_WRITEONLY, @@ -71,6 +68,8 @@ if t.TYPE_CHECKING: import sys + from jsonschema.protocols import Validator + if sys.version_info >= (3, 10): from typing import TypeAlias # noqa: ICN003 else: @@ -78,6 +77,7 @@ __all__ = [ + "DEFAULT_JSONSCHEMA_VALIDATOR", "ArrayType", "BooleanType", "CustomType", @@ -118,11 +118,13 @@ None, ] +DEFAULT_JSONSCHEMA_VALIDATOR: type[Validator] = validators.Draft7Validator # type: ignore[assignment] + T = t.TypeVar("T", bound=_JsonValue) P = t.TypeVar("P") -def extend_validator_with_defaults(validator_class): # noqa: ANN001, ANN201 +def extend_validator_with_defaults(validator_class: type[Validator]): # noqa: ANN201 """Fill in defaults, before validating with the provided JSON Schema Validator. See diff --git a/tests/core/sinks/test_validation.py b/tests/core/sinks/test_validation.py index c6a05ced1d..f8df7f7752 100644 --- a/tests/core/sinks/test_validation.py +++ b/tests/core/sinks/test_validation.py @@ -121,8 +121,8 @@ def test_validate_fastjsonschema(): @pytest.fixture -def draft7_sink_stop(): - """Return a sink object with Draft7 checks enabled.""" +def default_draft_sink_stop(): + """Return a sink object with the default draft checks enabled.""" class CustomSink(BatchSinkMock): """Custom sink class.""" @@ -147,8 +147,8 @@ class CustomSink(BatchSinkMock): @pytest.fixture -def draft7_sink_continue(): - """Return a sink object with Draft7 checks enabled.""" +def default_draft_sink_continue(): + """Return a sink object with the default draft checks enabled.""" class CustomSink(BatchSinkMock): """Custom sink class.""" @@ -174,9 +174,9 @@ class CustomSink(BatchSinkMock): def test_validate_record_jsonschema_format_checking_enabled_stop_on_error( - draft7_sink_stop, + default_draft_sink_stop, ): - sink: BatchSinkMock = draft7_sink_stop + sink: BatchSinkMock = default_draft_sink_stop record = { "id": 1, @@ -195,9 +195,9 @@ def test_validate_record_jsonschema_format_checking_enabled_stop_on_error( def test_validate_record_jsonschema_format_checking_enabled_continue_on_error( capsys: pytest.CaptureFixture, - draft7_sink_continue, + default_draft_sink_continue, ): - sink: BatchSinkMock = draft7_sink_continue + sink: BatchSinkMock = default_draft_sink_continue record = { "id": 1, diff --git a/tests/core/test_jsonschema_helpers.py b/tests/core/test_jsonschema_helpers.py index 15a63ec2c2..aeb2bae0b3 100644 --- a/tests/core/test_jsonschema_helpers.py +++ b/tests/core/test_jsonschema_helpers.py @@ -8,7 +8,6 @@ from textwrap import dedent import pytest -from jsonschema import Draft6Validator from singer_sdk.helpers._typing import ( JSONSCHEMA_ANNOTATION_SECRET, @@ -27,6 +26,7 @@ ) from singer_sdk.tap_base import Tap from singer_sdk.typing import ( + DEFAULT_JSONSCHEMA_VALIDATOR, AllOf, AnyType, ArrayType, @@ -932,7 +932,7 @@ def test_discriminated_union(): ), ) - validator = Draft6Validator(th.to_dict()) + validator = DEFAULT_JSONSCHEMA_VALIDATOR(th.to_dict()) assert validator.is_valid( { From b6fa56a273db03c4724d07e847ac257c49cc5155 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Ram=C3=ADrez=20Mondrag=C3=B3n?= <16805946+edgarrmondragon@users.noreply.github.com> Date: Tue, 13 Aug 2024 14:55:10 -0600 Subject: [PATCH 007/137] feat: Emit target metrics (#2486) * feat: Emit some target metrics * Remove unused Metric * Use PID tag in all metrics * Update tests * Update docs * Add batch processing timer * Add docs --- docs/implementation/metrics.md | 12 +++++++++--- singer_sdk/metrics.py | 9 ++++++++- singer_sdk/sinks/core.py | 27 +++++++++++++++++++++++++++ singer_sdk/target_base.py | 4 +++- tests/core/test_metrics.py | 14 ++++++++++++-- 5 files changed, 59 insertions(+), 7 deletions(-) diff --git a/docs/implementation/metrics.md b/docs/implementation/metrics.md index 87678cbe54..3b2a2a6a56 100644 --- a/docs/implementation/metrics.md +++ b/docs/implementation/metrics.md @@ -1,8 +1,14 @@ -# Tap Metrics +# Tap and Target Metrics Metrics logging is specified in the -[Singer Spec](https://hub.meltano.com/singer/spec#metrics). The SDK will automatically -emit metrics for `record_count`, `http_request_duration` and `sync_duration`. +[Singer Spec](https://hub.meltano.com/singer/spec#metrics). + +The SDK will automatically emit the following metrics: + +- `record_count`: The number of records processed by the tap or target. +- `http_request_duration`: The duration of HTTP requests made by the tap. +- `sync_duration`: The duration of the sync operation. +- `batch_processing_time`: The duration of processing a batch of records. ## Customization options diff --git a/singer_sdk/metrics.py b/singer_sdk/metrics.py index 50d7d3926f..fb97e693cb 100644 --- a/singer_sdk/metrics.py +++ b/singer_sdk/metrics.py @@ -47,6 +47,7 @@ class Tag(str, enum.Enum): JOB_TYPE = "job_type" HTTP_STATUS_CODE = "http_status_code" STATUS = "status" + PID = "pid" class Metric(str, enum.Enum): @@ -58,6 +59,7 @@ class Metric(str, enum.Enum): HTTP_REQUEST_COUNT = "http_request_count" JOB_DURATION = "job_duration" SYNC_DURATION = "sync_duration" + BATCH_PROCESSING_TIME = "batch_processing_time" @dataclass @@ -116,6 +118,7 @@ def __init__(self, metric: Metric, tags: dict | None = None) -> None: """ self.metric = metric self.tags = tags or {} + self.tags[Tag.PID] = os.getpid() self.logger = get_metrics_logger() @property @@ -182,6 +185,10 @@ def __init__( self.log_interval = log_interval self.last_log_time = time() + def exit(self) -> None: + """Exit the counter context.""" + self._pop() + def __enter__(self) -> Counter: """Enter the counter context. @@ -204,7 +211,7 @@ def __exit__( exc_val: The exception value. exc_tb: The exception traceback. """ - self._pop() + self.exit() def _pop(self) -> None: """Log and reset the counter.""" diff --git a/singer_sdk/sinks/core.py b/singer_sdk/sinks/core.py index e3c1ef5662..5a936a634f 100644 --- a/singer_sdk/sinks/core.py +++ b/singer_sdk/sinks/core.py @@ -17,6 +17,7 @@ import jsonschema.validators from typing_extensions import override +from singer_sdk import metrics from singer_sdk._singerlib.json import deserialize_json from singer_sdk.exceptions import ( InvalidJSONSchema, @@ -193,6 +194,31 @@ def __init__( ) self._validator: BaseJSONSchemaValidator | None = self.get_validator() + self._record_counter: metrics.Counter = metrics.record_counter(self.stream_name) + self._batch_timer = metrics.Timer( + metrics.Metric.BATCH_PROCESSING_TIME, + tags={ + metrics.Tag.STREAM: self.stream_name, + }, + ) + + @property + def record_counter_metric(self) -> metrics.Counter: + """Get the record counter for this sink. + + Returns: + The Meter instance for the record counter. + """ + return self._record_counter + + @property + def batch_processing_timer(self) -> metrics.Timer: + """Get the batch processing timer for this sink. + + Returns: + The Meter instance for the batch processing timer. + """ + return self._batch_timer @cached_property def validate_schema(self) -> bool: @@ -685,6 +711,7 @@ def clean_up(self) -> None: should not be relied on, it's recommended to use a uuid as well. """ self.logger.info("Cleaning up %s", self.stream_name) + self.record_counter_metric.exit() def process_batch_files( self, diff --git a/singer_sdk/target_base.py b/singer_sdk/target_base.py index 81c991a09a..22ad281763 100644 --- a/singer_sdk/target_base.py +++ b/singer_sdk/target_base.py @@ -359,6 +359,7 @@ def _process_record_message(self, message_dict: dict) -> None: sink.tally_record_read() sink.process_record(transformed_record, context) + sink.record_counter_metric.increment() sink._after_process_record(context) # noqa: SLF001 if sink.is_full: @@ -510,7 +511,8 @@ def drain_one(self, sink: Sink) -> None: # noqa: PLR6301 return draining_status = sink.start_drain() - sink.process_batch(draining_status) + with sink.batch_processing_timer: + sink.process_batch(draining_status) sink.mark_drained() def _drain_all(self, sink_list: list[Sink], parallelism: int) -> None: diff --git a/tests/core/test_metrics.py b/tests/core/test_metrics.py index c78969f864..b0de4c9bba 100644 --- a/tests/core/test_metrics.py +++ b/tests/core/test_metrics.py @@ -1,6 +1,7 @@ from __future__ import annotations import logging +import os import pytest import time_machine @@ -18,6 +19,8 @@ def __str__(self) -> str: def test_meter(): + pid = os.getpid() + class _MyMeter(metrics.Meter): def __enter__(self): return self @@ -27,11 +30,14 @@ def __exit__(self, exc_type, exc_val, exc_tb): meter = _MyMeter(metrics.Metric.RECORD_COUNT) - assert meter.tags == {} + assert meter.tags == {metrics.Tag.PID: pid} stream_context = {"parent_id": 1} meter.context = stream_context - assert meter.tags == {metrics.Tag.CONTEXT: stream_context} + assert meter.tags == { + metrics.Tag.CONTEXT: stream_context, + metrics.Tag.PID: pid, + } meter.context = None assert metrics.Tag.CONTEXT not in meter.tags @@ -39,6 +45,7 @@ def __exit__(self, exc_type, exc_val, exc_tb): def test_record_counter(caplog: pytest.LogCaptureFixture): caplog.set_level(logging.INFO, logger=metrics.METRICS_LOGGER_NAME) + pid = os.getpid() custom_object = CustomObject("test", 1) with metrics.record_counter( @@ -68,6 +75,7 @@ def test_record_counter(caplog: pytest.LogCaptureFixture): assert point.tags == { metrics.Tag.STREAM: "test_stream", metrics.Tag.ENDPOINT: "test_endpoint", + metrics.Tag.PID: pid, "custom_tag": "pytest", "custom_obj": custom_object, } @@ -79,6 +87,7 @@ def test_record_counter(caplog: pytest.LogCaptureFixture): def test_sync_timer(caplog: pytest.LogCaptureFixture): caplog.set_level(logging.INFO, logger=metrics.METRICS_LOGGER_NAME) + pid = os.getpid() traveler = time_machine.travel(0, tick=False) traveler.start() @@ -100,6 +109,7 @@ def test_sync_timer(caplog: pytest.LogCaptureFixture): assert point.tags == { metrics.Tag.STREAM: "test_stream", metrics.Tag.STATUS: "succeeded", + metrics.Tag.PID: pid, "custom_tag": "pytest", } From bbbc7a1a1f05108be40b39322315b973f8ab59e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Ram=C3=ADrez=20Mondrag=C3=B3n?= <16805946+edgarrmondragon@users.noreply.github.com> Date: Tue, 13 Aug 2024 16:33:58 -0600 Subject: [PATCH 008/137] docs: Update project sample links (#2604) * docs: Update project sample links Related: - Closes https://github.com/meltano/sdk/issues/728 - Closes https://github.com/meltano/sdk/issues/1588 * Fix links * Fix hub link * Update docs/code_samples.md Co-authored-by: Will Da Silva --------- Co-authored-by: Will Da Silva --- docs/code_samples.md | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/code_samples.md b/docs/code_samples.md index 6d9efefd11..1783b30f8b 100644 --- a/docs/code_samples.md +++ b/docs/code_samples.md @@ -4,18 +4,18 @@ Below you will find a collection of code samples which can be used for inspirati ## Project Samples -Below are full project samples, contributed by members in the community. Use these for inspiration -or to get more information on what an SDK-based tap or target will look like. - -- [tap-bamboohr by Auto IDM](https://gitlab.com/autoidm/tap-bamboohr) -- [tap-confluence by @edgarrmondragon](https://github.com/edgarrmondragon/tap-confluence) -- [tap-investing by @DouweM](https://gitlab.com/DouweM/tap-investing) -- [tap-parquet by AJ](https://github.com/dataops-tk/tap-parquet) -- [tap-powerbi-metadata by Slalom](https://github.com/dataops-tk/tap-powerbi-metadata) -- [target-athena, Community Project led by Andrew Stewart](https://github.com/dataops-tk/target-athena) - -To add your project to this list, please -[submit an issue](https://github.com/meltano/sdk/issues/new). +The following are full project samples, contributed by members of the community: + +- A REST Stream: [MeltanoLabs/tap-pulumi-cloud](https://github.com/MeltanoLabs/tap-pulumi-cloud) +- A SQL Target: [MeltanoLabs/target-postgres](https://github.com/MeltanoLabs/target-postgres) +- A SQL Tap: [MeltanoLabs/tap-postgres](https://github.com/MeltanoLabs/tap-postgres) +- A Cloud Service: [MeltanoLabs/tap-cloudwatch](https://github.com/MeltanoLabs/tap-cloudwatch) +- A REST Stream with complex and varied auth options: [MeltanoLabs/tap-github](https://github.com/MeltanoLabs/tap-github) + +There are many more examples available: go to [Meltano Hub](https://hub.meltano.com) and type `sdk` in the searchbar to see a list of taps and targets created with the Singer SDK. + +To add your project to Meltano Hub, please +[submit an issue](https://github.com/meltano/hub/issues/new?assignees=edgarrmondragon%2Cpnadolny13&labels=valuestream%2FHub&projects=&template=new_plugin.yml&title=Add+Plugin%3A+%3Cinsert+plugin+name%3E). ## Reusable Code Snippets From 6b5a44aa7d1d0b628ddc039698230b4b790a92b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Ram=C3=ADrez=20Mondrag=C3=B3n?= <16805946+edgarrmondragon@users.noreply.github.com> Date: Tue, 13 Aug 2024 16:34:15 -0600 Subject: [PATCH 009/137] ci: Let us create lightweight pre-release tags (#2581) --- .github/workflows/release.yml | 58 +++++++++++++++++++++++++++++------ 1 file changed, 48 insertions(+), 10 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 712f2e3dbf..cfb5ac75e1 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -5,6 +5,7 @@ on: jobs: build: + name: Build artifacts runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -12,8 +13,44 @@ jobs: fetch-depth: 0 - uses: hynek/build-and-inspect-python-package@v2 + check-tag: + name: Check tag + runs-on: ubuntu-latest + if: startsWith(github.ref, 'refs/tags/') + outputs: + is_final: ${{ steps.check.outputs.is_final }} + steps: + - name: Check if tag is a pre-release + id: check + run: | + echo "is_final=$(echo '${{ github.ref }}' | grep -qE '^v[0-9]+\.[0-9]+\.[0-9]+$' && echo 'true' || echo 'false')" >> $GITHUB_OUTPUT + + provenance: + name: Provenance + runs-on: ubuntu-latest + needs: [build] + if: startsWith(github.ref, 'refs/tags/') + permissions: + id-token: write # Needed for attestations + attestations: write # Needed for attestations + outputs: + bundle-path: ${{ steps.attest.outputs.bundle-path }} + steps: + - uses: actions/download-artifact@v4 + with: + name: Packages + path: dist + - uses: actions/attest-build-provenance@v1 + id: attest + with: + subject-path: "./dist/singer_sdk*" + - uses: actions/upload-artifact@v4 + with: + name: Attestations + path: ${{ steps.attest.outputs.bundle-path }} + publish: - name: Publish to PyPI + name: PyPI runs-on: ubuntu-latest needs: [build] environment: @@ -33,18 +70,22 @@ jobs: upload-to-release: name: Upload files to release runs-on: ubuntu-latest - needs: [build] - if: startsWith(github.ref, 'refs/tags/') + needs: [build, check-tag, provenance] + if: ${{ startsWith(github.ref, 'refs/tags/') && needs.check-tag.outputs.is_final == 'true' }} permissions: contents: write # Needed for uploading files to the release - id-token: write # Needed for attestations - attestations: write # Needed for attestations steps: - uses: actions/download-artifact@v4 with: name: Packages path: dist + + - uses: actions/download-artifact@v4 + with: + name: Attestations + path: attestations + - name: Upload wheel and sdist to release uses: svenstaro/upload-release-action@v2 with: @@ -52,14 +93,11 @@ jobs: tag: ${{ github.ref }} overwrite: true file_glob: true - - uses: actions/attest-build-provenance@v1 - id: attest - with: - subject-path: "./dist/singer_sdk*" + - name: Upload attestations to release uses: svenstaro/upload-release-action@v2 with: - file: ${{ steps.attest.outputs.bundle-path }} + file: attestations/attestation.jsonl tag: ${{ github.ref }} overwrite: true asset_name: attestations.intoto.jsonl From 3be5d97385a663f15b2900ac5ced1d879e5a3b44 Mon Sep 17 00:00:00 2001 From: Derek Visch Date: Fri, 16 Aug 2024 11:05:33 -0400 Subject: [PATCH 010/137] chore(templates): Remove "Source" from target cookiecutter template README (#2606) typo someone left source instead of target --- .../target-template/{{cookiecutter.target_id}}/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cookiecutter/target-template/{{cookiecutter.target_id}}/README.md b/cookiecutter/target-template/{{cookiecutter.target_id}}/README.md index 983be1ce5a..6733f0fc08 100644 --- a/cookiecutter/target-template/{{cookiecutter.target_id}}/README.md +++ b/cookiecutter/target-template/{{cookiecutter.target_id}}/README.md @@ -51,7 +51,7 @@ This Singer target will automatically import any environment variables within th `.env` if the `--config=ENV` is provided, such that config values will be considered if a matching environment variable is set either in the terminal context or in the `.env` file. -### Source Authentication and Authorization +### Authentication and Authorization