diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 53fe313..0a7bc53 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 2.6.1 +current_version = 2.6.2 parse = (?P\d+) \.(?P\d+) \.(?P\d+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4b8364b..fdb1e82 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +# tap-mssql 2.6.2 2024-10-09 +* Resolving issue when a table has a primary key and unique key. Both unique and primary key + columns were being identified as the primary key for the target table. Prioritising the + primary key first, and unique key secondary if there is no primary key. + # tap-mssql 2.6.1 2024-10-09 * Resolving issue with call get the prior LSN number (passing in unescaped table). diff --git a/pyproject.toml b/pyproject.toml index 2295f03..0173a77 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "tap-mssql" -version = "2.6.1" +version = "2.6.2" description = "A pipelinewise compatible tap for connecting Microsoft SQL Server" authors = ["Rob Winters "] license = "GNU Affero" diff --git a/tap_mssql/__init__.py b/tap_mssql/__init__.py index c972c43..dee2818 100644 --- a/tap_mssql/__init__.py +++ b/tap_mssql/__init__.py @@ -230,18 +230,31 @@ def discover_catalog(mssql_conn, config): table_info[db][table] = {"row_count": None, "is_view": table_type == "VIEW"} LOGGER.info("Tables fetched, fetching columns") cur.execute( - """with constraint_columns as ( + """ with table_constraints as ( + select tc.TABLE_SCHEMA, + tc.TABLE_NAME, + tc.CONSTRAINT_NAME, + tc.CONSTRAINT_TYPE, + row_number() over (partition by tc.TABLE_SCHEMA, tc.TABLE_NAME + order by tc.constraint_TYPE) as row_number_rank + + from INFORMATION_SCHEMA.TABLE_CONSTRAINTS tc + where tc.CONSTRAINT_TYPE in ('PRIMARY KEY', 'UNIQUE') + ) + ,constraint_columns as ( select c.TABLE_SCHEMA , c.TABLE_NAME , c.COLUMN_NAME + , c.CONSTRAINT_NAME from INFORMATION_SCHEMA.CONSTRAINT_COLUMN_USAGE c - join INFORMATION_SCHEMA.TABLE_CONSTRAINTS tc + join table_constraints tc on tc.TABLE_SCHEMA = c.TABLE_SCHEMA and tc.TABLE_NAME = c.TABLE_NAME and tc.CONSTRAINT_NAME = c.CONSTRAINT_NAME - and tc.CONSTRAINT_TYPE in ('PRIMARY KEY', 'UNIQUE')) + and tc.row_number_rank = 1 + ) SELECT c.TABLE_SCHEMA, c.TABLE_NAME, c.COLUMN_NAME, diff --git a/tests/test_tap_mssql.py b/tests/test_tap_mssql.py index e8d1154..59c8aea 100755 --- a/tests/test_tap_mssql.py +++ b/tests/test_tap_mssql.py @@ -650,6 +650,63 @@ def test_do_not_discover_key_properties_for_view(self): self.assertEqual(primary_keys, {"a_table": ["id"], "a_view": []}) +class TestPrimaryKeyUniqueKey(unittest.TestCase): + def setUp(self): + self.conn = test_utils.get_test_connection() + + with connect_with_backoff(self.conn) as open_conn: + with open_conn.cursor() as cursor: + try: + cursor.execute("drop table uc_only_table") + except: + pass + try: + cursor.execute("drop table pk_only_table") + except: + pass + try: + cursor.execute("drop table pk_uc_table") + except: + pass + cursor.execute( + """ + CREATE TABLE uc_only_table ( + pk int, + uc_1 int, + uc_2 int, + CONSTRAINT constraint_uc_only_table UNIQUE(uc_1,uc_2) ) + """ + ) + cursor.execute( + """ + CREATE TABLE pk_only_table ( + pk int PRIMARY KEY, + uc_1 int, + uc_2 int, + ) + """ + ) + cursor.execute( + """ + CREATE TABLE pk_uc_table ( + pk int PRIMARY KEY, + uc_1 int, + uc_2 int, + CONSTRAINT constraint_pk_uc_table UNIQUE(uc_1,uc_2) ) + """ + ) + + def test_only_primary_key(self): + catalog = test_utils.discover_catalog(self.conn, {}) + primary_keys = {} + for c in catalog.streams: + primary_keys[c.table] = ( + singer.metadata.to_map(c.metadata).get((), {}).get("table-key-properties") + ) + + self.assertEqual(primary_keys["uc_only_table"], ["uc_1","uc_2"]) + self.assertEqual(primary_keys["pk_only_table"], ["pk"]) + self.assertEqual(primary_keys["pk_uc_table"], ["pk"]) if __name__ == "__main__": # test1 = TestBinlogReplication()