Skip to content

Commit

Permalink
remove counting rows in checking for resync
Browse files Browse the repository at this point in the history
  • Loading branch information
amofakhar committed Jul 25, 2024
1 parent f099e7f commit 0843b33
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 19 deletions.
8 changes: 1 addition & 7 deletions pipelinewise/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,24 +30,19 @@ def get_tables_size(schema: str, tap) -> dict:
tap.open_connections()
result_list = tap.query(
'select TABLE_NAME as table_name,'
' TABLE_ROWS as table_rows,'
' (DATA_LENGTH + INDEX_LENGTH)/ 1024 / 1024 as table_size'
f' from information_schema.TABLES where TABLE_SCHEMA = \'{schema}\';')
tap.close_connections()
for res in result_list:
result.append({
'table_name': f'{schema}.{res["table_name"]}',
'table_rows': res['table_rows'],
'table_size': res['table_size']
})

if 'FastSyncTapPostgres' in str(type(tap)):
tap.open_connection()
result_list = tap.query(
'SELECT TABLE_NAME as table_name,'
' (xpath(\'/row/c/text()\','
' query_to_xml(format(\'select count(*) as c from %I.%I\', table_schema, TABLE_NAME), FALSE, TRUE, \'\'))'
')[1]::text::int AS table_rows,'
' pg_total_relation_size('
'\'"\'||table_schema||\'"."\'||table_name||\'"\')::NUMERIC/1024::NUMERIC/1024 as table_size '
'FROM (SELECT table_schema, TABLE_NAME FROM information_schema.tables '
Expand All @@ -56,8 +51,7 @@ def get_tables_size(schema: str, tap) -> dict:
for res in result_list:
result.append({
'table_name': f'{schema}.{res[0]}',
'table_rows': res[1],
'table_size': res[2]
'table_size': res[1]
})
tap.close_connection()
return result
Expand Down
24 changes: 12 additions & 12 deletions tests/units/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,14 @@ def fetchall(self):
"""Mock fetchall method"""
if self.type == 'mysql':
return [
{'table_name': 't1', 'table_rows': 1, 'table_size': 1234},
{'table_name': 't2', 'table_rows': 3, 'table_size': 1234}
{'table_name': 't1', 'table_size': 1234},
{'table_name': 't2', 'table_size': 1234}
]

if self.type == 'postgres':
return [
['t1', 1, 1234],
['t2', 3, 1234],
['t1', 1234],
['t2', 1234],
]
return None

Expand Down Expand Up @@ -119,8 +119,8 @@ def test_get_tables_size(self):
"""Test get_tables_size method works correctly"""
test_schema = 'foo_schema'
expected_tables_info = [
{'table_name': f'{test_schema}.t1', 'table_rows': 1, 'table_size': 1234},
{'table_name': f'{test_schema}.t2', 'table_rows': 3, 'table_size': 1234}
{'table_name': f'{test_schema}.t1', 'table_size': 1234},
{'table_name': f'{test_schema}.t2', 'table_size': 1234}
]

connection_config = {'host': 'foo_host',
Expand Down Expand Up @@ -158,15 +158,15 @@ def test_filter_out_selected_tables(self):
"""Test filter_out_selected_tables method works correctly"""
selected_tables = {'foo.t1', 'foo.t3', 'bar.t2'}
all_schema_tables = [
{'table_name': 'foo.t1', 'table_rows': 1, 'table_size': 1111},
{'table_name': 'foo.t2', 'table_rows': 2, 'table_size': 2222},
{'table_name': 'foo.t3', 'table_rows': 3, 'table_size': 3333},
{'table_name': 'foo.something t1 Uppercase', 'table_rows': 4, 'table_size': 1234}
{'table_name': 'foo.t1', 'table_size': 1111},
{'table_name': 'foo.t2', 'table_size': 2222},
{'table_name': 'foo.t3', 'table_size': 3333},
{'table_name': 'foo.something t1 Uppercase', 'table_size': 1234}
]

expected_output = [
{'table_name': 'foo.t1', 'table_rows': 1, 'table_size': 1111},
{'table_name': 'foo.t3', 'table_rows': 3, 'table_size': 3333},
{'table_name': 'foo.t1', 'table_size': 1111},
{'table_name': 'foo.t3', 'table_size': 3333},
]

actual_output = utils.filter_out_selected_tables(all_schema_tables, selected_tables)
Expand Down

0 comments on commit 0843b33

Please sign in to comment.