From 7cdc3fe44e9febdc0a14749635365a94f04aa3b8 Mon Sep 17 00:00:00 2001 From: Steve Clarke <84364906+s7clarke10@users.noreply.github.com> Date: Wed, 9 Oct 2024 16:22:52 +1300 Subject: [PATCH 1/3] Picking only one table for the primary key --- tap_mssql/__init__.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/tap_mssql/__init__.py b/tap_mssql/__init__.py index c972c43..dee2818 100644 --- a/tap_mssql/__init__.py +++ b/tap_mssql/__init__.py @@ -230,18 +230,31 @@ def discover_catalog(mssql_conn, config): table_info[db][table] = {"row_count": None, "is_view": table_type == "VIEW"} LOGGER.info("Tables fetched, fetching columns") cur.execute( - """with constraint_columns as ( + """ with table_constraints as ( + select tc.TABLE_SCHEMA, + tc.TABLE_NAME, + tc.CONSTRAINT_NAME, + tc.CONSTRAINT_TYPE, + row_number() over (partition by tc.TABLE_SCHEMA, tc.TABLE_NAME + order by tc.constraint_TYPE) as row_number_rank + + from INFORMATION_SCHEMA.TABLE_CONSTRAINTS tc + where tc.CONSTRAINT_TYPE in ('PRIMARY KEY', 'UNIQUE') + ) + ,constraint_columns as ( select c.TABLE_SCHEMA , c.TABLE_NAME , c.COLUMN_NAME + , c.CONSTRAINT_NAME from INFORMATION_SCHEMA.CONSTRAINT_COLUMN_USAGE c - join INFORMATION_SCHEMA.TABLE_CONSTRAINTS tc + join table_constraints tc on tc.TABLE_SCHEMA = c.TABLE_SCHEMA and tc.TABLE_NAME = c.TABLE_NAME and tc.CONSTRAINT_NAME = c.CONSTRAINT_NAME - and tc.CONSTRAINT_TYPE in ('PRIMARY KEY', 'UNIQUE')) + and tc.row_number_rank = 1 + ) SELECT c.TABLE_SCHEMA, c.TABLE_NAME, c.COLUMN_NAME, From 55307093b008e2f0d2a5ac1fb71cb170efdce1c4 Mon Sep 17 00:00:00 2001 From: Steve Clarke <84364906+s7clarke10@users.noreply.github.com> Date: Wed, 9 Oct 2024 18:19:17 +1300 Subject: [PATCH 2/3] Bumping version and updating change log --- .bumpversion.cfg | 2 +- CHANGELOG.md | 5 +++++ pyproject.toml | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 53fe313..0a7bc53 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 2.6.1 +current_version = 2.6.2 parse = (?P\d+) \.(?P\d+) \.(?P\d+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4b8364b..fdb1e82 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +# tap-mssql 2.6.2 2024-10-09 +* Resolving issue when a table has a primary key and unique key. Both unique and primary key + columns were being identified as the primary key for the target table. Prioritising the + primary key first, and unique key secondary if there is no primary key. + # tap-mssql 2.6.1 2024-10-09 * Resolving issue with call get the prior LSN number (passing in unescaped table). diff --git a/pyproject.toml b/pyproject.toml index 2295f03..0173a77 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "tap-mssql" -version = "2.6.1" +version = "2.6.2" description = "A pipelinewise compatible tap for connecting Microsoft SQL Server" authors = ["Rob Winters "] license = "GNU Affero" From a05e78b16dc916d20d9a6039037cb01dd516a723 Mon Sep 17 00:00:00 2001 From: Steve Clarke <84364906+s7clarke10@users.noreply.github.com> Date: Thu, 17 Oct 2024 14:45:22 +1300 Subject: [PATCH 3/3] Adding Primary / Unique key tests --- tests/test_tap_mssql.py | 57 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/tests/test_tap_mssql.py b/tests/test_tap_mssql.py index e8d1154..59c8aea 100755 --- a/tests/test_tap_mssql.py +++ b/tests/test_tap_mssql.py @@ -650,6 +650,63 @@ def test_do_not_discover_key_properties_for_view(self): self.assertEqual(primary_keys, {"a_table": ["id"], "a_view": []}) +class TestPrimaryKeyUniqueKey(unittest.TestCase): + def setUp(self): + self.conn = test_utils.get_test_connection() + + with connect_with_backoff(self.conn) as open_conn: + with open_conn.cursor() as cursor: + try: + cursor.execute("drop table uc_only_table") + except: + pass + try: + cursor.execute("drop table pk_only_table") + except: + pass + try: + cursor.execute("drop table pk_uc_table") + except: + pass + cursor.execute( + """ + CREATE TABLE uc_only_table ( + pk int, + uc_1 int, + uc_2 int, + CONSTRAINT constraint_uc_only_table UNIQUE(uc_1,uc_2) ) + """ + ) + cursor.execute( + """ + CREATE TABLE pk_only_table ( + pk int PRIMARY KEY, + uc_1 int, + uc_2 int, + ) + """ + ) + cursor.execute( + """ + CREATE TABLE pk_uc_table ( + pk int PRIMARY KEY, + uc_1 int, + uc_2 int, + CONSTRAINT constraint_pk_uc_table UNIQUE(uc_1,uc_2) ) + """ + ) + + def test_only_primary_key(self): + catalog = test_utils.discover_catalog(self.conn, {}) + primary_keys = {} + for c in catalog.streams: + primary_keys[c.table] = ( + singer.metadata.to_map(c.metadata).get((), {}).get("table-key-properties") + ) + + self.assertEqual(primary_keys["uc_only_table"], ["uc_1","uc_2"]) + self.assertEqual(primary_keys["pk_only_table"], ["pk"]) + self.assertEqual(primary_keys["pk_uc_table"], ["pk"]) if __name__ == "__main__": # test1 = TestBinlogReplication()