Skip to content

Commit

Permalink
feat(taps): SQL taps now emit schemas with maxLength when applicable
Browse files Browse the repository at this point in the history
  • Loading branch information
edgarrmondragon committed Sep 6, 2024
1 parent 7ea1422 commit 39a0ed5
Show file tree
Hide file tree
Showing 5 changed files with 15 additions and 14 deletions.
7 changes: 3 additions & 4 deletions singer_sdk/connectors/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,15 +166,14 @@ def float_to_jsonschema(self, column_type: sa.types.Numeric) -> dict: # noqa: A
return th.NumberType.type_dict # type: ignore[no-any-return]

@to_jsonschema.register
def string_to_jsonschema(self, column_type: sa.types.String) -> dict: # noqa: ARG002, PLR6301
def string_to_jsonschema(self, column_type: sa.types.String) -> dict: # noqa: PLR6301
"""Return a JSON Schema representation of a generic string type.
Args:
column_type (:column_type:`String`): The column type.
"""
# TODO: Enable support for maxLength.
# if sa_type.length:
# return StringType(max_length=sa_type.length).type_dict # noqa: ERA001
if column_type.length:
return th.StringType(max_length=column_type.length).type_dict
return th.StringType.type_dict # type: ignore[no-any-return]

@to_jsonschema.register
Expand Down
2 changes: 0 additions & 2 deletions singer_sdk/streams/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,8 +210,6 @@ def get_records(self, context: Context | None) -> t.Iterable[dict[str, t.Any]]:

with self.connector._connect() as conn: # noqa: SLF001
for record in conn.execute(query).mappings():
# TODO: Standardize record mapping type
# https://github.com/meltano/sdk/issues/2096
transformed_record = self.post_process(dict(record))
if transformed_record is None:
# Record filtered out during post_process()
Expand Down
1 change: 0 additions & 1 deletion tests/core/test_connector_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,6 @@ def test_fully_qualified_name_empty_error():
pytest.param(
sa.types.VARCHAR(length=127),
{"type": ["string"], "maxLength": 127},
marks=pytest.mark.xfail,
id="varchar-length",
),
pytest.param(sa.types.TEXT(), {"type": ["string"]}, id="text"),
Expand Down
5 changes: 3 additions & 2 deletions tests/samples/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,15 @@ def _sqlite_sample_db(sqlite_connector):
f"""
CREATE TABLE t{t} (
c1 int PRIMARY KEY NOT NULL,
c2 varchar(10) NOT NULL
c2 varchar(10) NOT NULL,
c3 text NOT NULL
)
"""
),
)
for x in range(100):
conn.execute(
sa.text(f"INSERT INTO t{t} VALUES ({x}, 'x={x}')"), # noqa: S608
sa.text(f"INSERT INTO t{t} VALUES ({x}, 'x={x}', 'y={x}')"), # noqa: S608
)


Expand Down
14 changes: 9 additions & 5 deletions tests/samples/test_tap_sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def test_sqlite_discovery(sqlite_sample_tap: SQLTap):
sqlite_sample_tap.sync_all()
stream = t.cast(SQLStream, sqlite_sample_tap.streams["main-t1"])
schema = stream.schema
assert len(schema["properties"]) == 2
assert len(schema["properties"]) == 3
assert stream.name == stream.tap_stream_id == "main-t1"

md_map = MetadataMapping.from_iterable(stream.catalog_entry["metadata"])
Expand All @@ -90,13 +90,17 @@ def test_sqlite_discovery(sqlite_sample_tap: SQLTap):
def test_sqlite_input_catalog(sqlite_sample_tap: SQLTap):
sqlite_sample_tap.sync_all()
stream = t.cast(SQLStream, sqlite_sample_tap.streams["main-t1"])
assert len(stream.schema["properties"]) == 2
assert len(stream.stream_maps[0].transformed_schema["properties"]) == 2
assert len(stream.schema["properties"]) == 3
assert len(stream.stream_maps[0].transformed_schema["properties"]) == 3

for schema in [stream.schema, stream.stream_maps[0].transformed_schema]:
assert len(schema["properties"]) == 2
assert len(schema["properties"]) == 3
assert schema["properties"]["c1"] == {"type": ["integer"]}
assert schema["properties"]["c2"] == {"type": ["string", "null"]}
assert schema["properties"]["c2"] == {
"type": ["string", "null"],
"maxLength": 10,
}
assert schema["properties"]["c3"] == {"type": ["string", "null"]}
assert stream.name == stream.tap_stream_id == "main-t1"

md_map = MetadataMapping.from_iterable(stream.catalog_entry["metadata"])
Expand Down

0 comments on commit 39a0ed5

Please sign in to comment.