Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(taps): SQL taps now emit schemas with maxLength when applicable #2651

Merged
merged 1 commit into from
Sep 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions singer_sdk/connectors/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,15 +166,14 @@ def float_to_jsonschema(self, column_type: sa.types.Numeric) -> dict: # noqa: A
return th.NumberType.type_dict # type: ignore[no-any-return]

@to_jsonschema.register
def string_to_jsonschema(self, column_type: sa.types.String) -> dict: # noqa: ARG002, PLR6301
def string_to_jsonschema(self, column_type: sa.types.String) -> dict: # noqa: PLR6301
"""Return a JSON Schema representation of a generic string type.

Args:
column_type (:column_type:`String`): The column type.
"""
# TODO: Enable support for maxLength.
# if sa_type.length:
# return StringType(max_length=sa_type.length).type_dict # noqa: ERA001
if column_type.length:
return th.StringType(max_length=column_type.length).type_dict # type: ignore[no-any-return]
return th.StringType.type_dict # type: ignore[no-any-return]

@to_jsonschema.register
Expand Down
2 changes: 0 additions & 2 deletions singer_sdk/streams/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,8 +210,6 @@ def get_records(self, context: Context | None) -> t.Iterable[dict[str, t.Any]]:

with self.connector._connect() as conn: # noqa: SLF001
for record in conn.execute(query).mappings():
# TODO: Standardize record mapping type
# https://github.com/meltano/sdk/issues/2096
transformed_record = self.post_process(dict(record))
if transformed_record is None:
# Record filtered out during post_process()
Expand Down
1 change: 0 additions & 1 deletion tests/core/test_connector_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,6 @@ def test_fully_qualified_name_empty_error():
pytest.param(
sa.types.VARCHAR(length=127),
{"type": ["string"], "maxLength": 127},
marks=pytest.mark.xfail,
id="varchar-length",
),
pytest.param(sa.types.TEXT(), {"type": ["string"]}, id="text"),
Expand Down
5 changes: 3 additions & 2 deletions tests/samples/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,15 @@ def _sqlite_sample_db(sqlite_connector):
f"""
CREATE TABLE t{t} (
c1 int PRIMARY KEY NOT NULL,
c2 varchar(10) NOT NULL
c2 varchar(10) NOT NULL,
c3 text NOT NULL
)
"""
),
)
for x in range(100):
conn.execute(
sa.text(f"INSERT INTO t{t} VALUES ({x}, 'x={x}')"), # noqa: S608
sa.text(f"INSERT INTO t{t} VALUES ({x}, 'x={x}', 'y={x}')"), # noqa: S608
)


Expand Down
14 changes: 9 additions & 5 deletions tests/samples/test_tap_sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def test_sqlite_discovery(sqlite_sample_tap: SQLTap):
sqlite_sample_tap.sync_all()
stream = t.cast(SQLStream, sqlite_sample_tap.streams["main-t1"])
schema = stream.schema
assert len(schema["properties"]) == 2
assert len(schema["properties"]) == 3
assert stream.name == stream.tap_stream_id == "main-t1"

md_map = MetadataMapping.from_iterable(stream.catalog_entry["metadata"])
Expand All @@ -90,13 +90,17 @@ def test_sqlite_discovery(sqlite_sample_tap: SQLTap):
def test_sqlite_input_catalog(sqlite_sample_tap: SQLTap):
sqlite_sample_tap.sync_all()
stream = t.cast(SQLStream, sqlite_sample_tap.streams["main-t1"])
assert len(stream.schema["properties"]) == 2
assert len(stream.stream_maps[0].transformed_schema["properties"]) == 2
assert len(stream.schema["properties"]) == 3
assert len(stream.stream_maps[0].transformed_schema["properties"]) == 3

for schema in [stream.schema, stream.stream_maps[0].transformed_schema]:
assert len(schema["properties"]) == 2
assert len(schema["properties"]) == 3
assert schema["properties"]["c1"] == {"type": ["integer"]}
assert schema["properties"]["c2"] == {"type": ["string", "null"]}
assert schema["properties"]["c2"] == {
"type": ["string", "null"],
"maxLength": 10,
}
assert schema["properties"]["c3"] == {"type": ["string", "null"]}
assert stream.name == stream.tap_stream_id == "main-t1"

md_map = MetadataMapping.from_iterable(stream.catalog_entry["metadata"])
Expand Down
Loading