Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit c0375ac

Browse files
committed
black -l 120
1 parent 09a0593 commit c0375ac

File tree

4 files changed

+178
-179
lines changed

4 files changed

+178
-179
lines changed

data_diff/dbt.py

Lines changed: 34 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,6 @@ def dbt_diff(
7171
config = dbt_parser.get_datadiff_config()
7272
_initialize_events(dbt_parser.dbt_user_id, dbt_parser.dbt_version, dbt_parser.dbt_project_id)
7373

74-
7574
if not state and not (config.prod_database or config.prod_schema):
7675
doc_url = "https://docs.datafold.com/development_testing/open_source#configure-your-dbt-project"
7776
raise DataDiffDbtProjectVarsNotFoundError(
@@ -130,14 +129,17 @@ def dbt_diff(
130129
_local_diff(diff_vars, json_output)
131130
else:
132131
if json_output:
133-
print(json.dumps(
134-
jsonify_error(
135-
table1=diff_vars.prod_path,
136-
table2=diff_vars.dev_path,
137-
dbt_model=diff_vars.dbt_model,
138-
error="No primary key found. Add uniqueness tests, meta, or tags.",
139-
)
140-
), flush=True)
132+
print(
133+
json.dumps(
134+
jsonify_error(
135+
table1=diff_vars.prod_path,
136+
table2=diff_vars.dev_path,
137+
dbt_model=diff_vars.dbt_model,
138+
error="No primary key found. Add uniqueness tests, meta, or tags.",
139+
)
140+
),
141+
flush=True,
142+
)
141143
else:
142144
rich.print(
143145
_diff_output_base(".".join(diff_vars.dev_path), ".".join(diff_vars.prod_path))
@@ -189,7 +191,6 @@ def _get_diff_vars(
189191
)
190192

191193

192-
193194
def _get_prod_path_from_config(config, model, dev_database, dev_schema) -> Tuple[str, str]:
194195
# "custom" dbt config database
195196
if model.config.database:
@@ -233,12 +234,11 @@ def _local_diff(diff_vars: TDiffVars, json_output: bool = False) -> None:
233234
dev_qualified_str = ".".join(diff_vars.dev_path)
234235
prod_qualified_str = ".".join(diff_vars.prod_path)
235236
diff_output_str = _diff_output_base(dev_qualified_str, prod_qualified_str)
236-
237+
237238
table1 = connect_to_table(
238239
diff_vars.connection, prod_qualified_str, tuple(diff_vars.primary_keys), diff_vars.threads
239240
)
240241
table2 = connect_to_table(diff_vars.connection, dev_qualified_str, tuple(diff_vars.primary_keys), diff_vars.threads)
241-
242242

243243
table1_columns = table1.get_schema()
244244
try:
@@ -294,23 +294,29 @@ def _local_diff(diff_vars: TDiffVars, json_output: bool = False) -> None:
294294
try:
295295
list(diff)
296296
except Exception as e:
297-
print(json.dumps(jsonify_error(
298-
list(table1.table_path),
299-
list(table2.table_path),
300-
diff_vars.dbt_model,
301-
str(e)
302-
)), flush=True)
297+
print(
298+
json.dumps(
299+
jsonify_error(list(table1.table_path), list(table2.table_path), diff_vars.dbt_model, str(e))
300+
),
301+
flush=True,
302+
)
303303
return
304304

305-
print(json.dumps(
306-
jsonify(
307-
diff,
308-
dbt_model=diff_vars.dbt_model,
309-
with_summary=True, with_columns={
310-
"added": columns_added,
311-
"removed": columns_removed,
312-
"changed": columns_type_changed,
313-
})), flush=True)
305+
print(
306+
json.dumps(
307+
jsonify(
308+
diff,
309+
dbt_model=diff_vars.dbt_model,
310+
with_summary=True,
311+
with_columns={
312+
"added": columns_added,
313+
"removed": columns_removed,
314+
"changed": columns_type_changed,
315+
},
316+
)
317+
),
318+
flush=True,
319+
)
314320
return
315321

316322
if list(diff):
@@ -466,7 +472,7 @@ def _initialize_events(dbt_user_id: Optional[str], dbt_version: Optional[str], d
466472

467473

468474
def _email_signup() -> None:
469-
email_regex = r'^[\w\.\+-]+@[\w\.-]+\.\w+$'
475+
email_regex = r"^[\w\.\+-]+@[\w\.-]+\.\w+$"
470476
prompt = "\nWould you like to be notified when a new data-diff version is available?\n\nEnter email or leave blank to opt out (we'll only ask once).\n"
471477

472478
if bool_ask_for_email():

data_diff/format.py

Lines changed: 59 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from data_diff.diff_tables import DiffResultWrapper
66

77

8-
def jsonify_error(table1: List[str], table2: List[str], dbt_model: str, error: str) -> 'FailedDiff':
8+
def jsonify_error(table1: List[str], table2: List[str], dbt_model: str, error: str) -> "FailedDiff":
99
return FailedDiff(
1010
status="failed",
1111
model=dbt_model,
@@ -15,10 +15,12 @@ def jsonify_error(table1: List[str], table2: List[str], dbt_model: str, error: s
1515
).json()
1616

1717

18-
def jsonify(diff: DiffResultWrapper,
19-
dbt_model: str,
20-
with_summary: bool = False,
21-
with_columns: Optional[Dict[str, List[str]]] = None) -> 'JsonDiff':
18+
def jsonify(
19+
diff: DiffResultWrapper,
20+
dbt_model: str,
21+
with_summary: bool = False,
22+
with_columns: Optional[Dict[str, List[str]]] = None,
23+
) -> "JsonDiff":
2224
"""
2325
Converts the diff result into a JSON-serializable format.
2426
Optionally add stats summary and schema diff.
@@ -35,7 +37,6 @@ def jsonify(diff: DiffResultWrapper,
3537

3638
t1_exclusive_rows, t2_exclusive_rows, diff_rows = _group_rows(diff_info, schema)
3739

38-
3940
diff_rows_jsonified = []
4041
for row in diff_rows:
4142
diff_rows_jsonified.append(_jsonify_diff(row, key_columns))
@@ -47,11 +48,11 @@ def jsonify(diff: DiffResultWrapper,
4748
t2_exclusive_rows_jsonified = []
4849
for row in t2_exclusive_rows:
4950
t2_exclusive_rows_jsonified.append(_jsonify_exclusive(row, key_columns))
50-
51+
5152
summary = None
5253
if with_summary:
5354
summary = _jsonify_diff_summary(diff.get_stats_dict())
54-
55+
5556
columns = None
5657
if with_columns:
5758
columns = _jsonify_columns_diff(with_columns, list(key_columns))
@@ -60,11 +61,8 @@ def jsonify(diff: DiffResultWrapper,
6061
t1_exclusive_rows
6162
or t2_exclusive_rows
6263
or diff_rows
63-
or with_columns and (
64-
with_columns['added']
65-
or with_columns['removed']
66-
or with_columns['changed']
67-
)
64+
or with_columns
65+
and (with_columns["added"] or with_columns["removed"] or with_columns["changed"])
6866
)
6967
return JsonDiff(
7068
status="success",
@@ -73,23 +71,20 @@ def jsonify(diff: DiffResultWrapper,
7371
dataset1=list(table1.table_path),
7472
dataset2=list(table2.table_path),
7573
rows=RowsDiff(
76-
exclusive=ExclusiveDiff(
77-
dataset1=t1_exclusive_rows_jsonified,
78-
dataset2=t2_exclusive_rows_jsonified
79-
),
74+
exclusive=ExclusiveDiff(dataset1=t1_exclusive_rows_jsonified, dataset2=t2_exclusive_rows_jsonified),
8075
diff=diff_rows_jsonified,
8176
),
8277
summary=summary,
8378
columns=columns,
8479
).json()
8580

8681

87-
8882
@dataclass
8983
class JsonExclusiveRowValue:
9084
"""
9185
Value of a single column in a row
9286
"""
87+
9388
isPK: bool
9489
value: Any
9590

@@ -99,6 +94,7 @@ class JsonDiffRowValue:
9994
"""
10095
Pair of diffed values for 2 rows with equal PKs
10196
"""
97+
10298
dataset1: Any
10399
dataset2: Any
104100
isDiff: bool
@@ -163,38 +159,40 @@ class RowsDiff:
163159

164160
@dataclass
165161
class FailedDiff:
166-
status: str # Literal ["failed"]
162+
status: str # Literal ["failed"]
167163
model: str
168164
dataset1: List[str]
169165
dataset2: List[str]
170166
error: str
171167

172-
version: str = '1.0.0'
168+
version: str = "1.0.0"
169+
173170

174171
@dataclass
175172
class JsonDiff:
176-
status: str # Literal ["success"]
177-
result: str # Literal ["different", "identical"]
173+
status: str # Literal ["success"]
174+
result: str # Literal ["different", "identical"]
178175
model: str
179176
dataset1: List[str]
180177
dataset2: List[str]
181178
rows: RowsDiff
182179
summary: Optional[JsonDiffSummary]
183180
columns: Optional[JsonColumnsSummary]
184181

185-
version: str = '1.0.0'
182+
version: str = "1.0.0"
186183

187184

188-
def _group_rows(diff_info: DiffResultWrapper,
189-
schema: List[str]) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]], List[Dict[str, Any]]]:
185+
def _group_rows(
186+
diff_info: DiffResultWrapper, schema: List[str]
187+
) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]], List[Dict[str, Any]]]:
190188
t1_exclusive_rows = []
191189
t2_exclusive_rows = []
192190
diff_rows = []
193191

194192
for row in diff_info.diff:
195193
row_w_schema = dict(zip(schema, row))
196-
is_t1_exclusive = row_w_schema['is_exclusive_a']
197-
is_t2_exclusive = row_w_schema['is_exclusive_b']
194+
is_t1_exclusive = row_w_schema["is_exclusive_a"]
195+
is_t2_exclusive = row_w_schema["is_exclusive_b"]
198196

199197
if is_t1_exclusive:
200198
t1_exclusive_rows.append(row_w_schema)
@@ -204,83 +202,72 @@ def _group_rows(diff_info: DiffResultWrapper,
204202

205203
else:
206204
diff_rows.append(row_w_schema)
207-
205+
208206
return t1_exclusive_rows, t2_exclusive_rows, diff_rows
209207

210208

211209
def _jsonify_diff(row: Dict[str, Any], key_columns: List[str]) -> Dict[str, JsonDiffRowValue]:
212210
columns = collections.defaultdict(dict)
213211
for field, value in row.items():
214-
if field in ('is_exclusive_a', 'is_exclusive_b'):
212+
if field in ("is_exclusive_a", "is_exclusive_b"):
215213
continue
216214

217-
if field.startswith('is_diff_'):
218-
column_name = field.replace('is_diff_', '')
219-
columns[column_name]['isDiff'] = bool(value)
215+
if field.startswith("is_diff_"):
216+
column_name = field.replace("is_diff_", "")
217+
columns[column_name]["isDiff"] = bool(value)
220218

221-
elif field.endswith('_a'):
222-
column_name = field.replace('_a', '')
223-
columns[column_name]['dataset1'] = value
224-
columns[column_name]['isPK'] = column_name in key_columns
219+
elif field.endswith("_a"):
220+
column_name = field.replace("_a", "")
221+
columns[column_name]["dataset1"] = value
222+
columns[column_name]["isPK"] = column_name in key_columns
225223

226-
elif field.endswith('_b'):
227-
column_name = field.replace('_b', '')
228-
columns[column_name]['dataset2'] = value
229-
columns[column_name]['isPK'] = column_name in key_columns
230-
231-
return {
232-
column: JsonDiffRowValue(**data)
233-
for column, data in columns.items()
234-
}
224+
elif field.endswith("_b"):
225+
column_name = field.replace("_b", "")
226+
columns[column_name]["dataset2"] = value
227+
columns[column_name]["isPK"] = column_name in key_columns
228+
229+
return {column: JsonDiffRowValue(**data) for column, data in columns.items()}
235230

236231

237232
def _jsonify_exclusive(row: Dict[str, Any], key_columns: List[str]) -> Dict[str, JsonExclusiveRowValue]:
238233
columns = collections.defaultdict(dict)
239234
for field, value in row.items():
240-
if field in ('is_exclusive_a', 'is_exclusive_b'):
235+
if field in ("is_exclusive_a", "is_exclusive_b"):
241236
continue
242-
if field.startswith('is_diff_'):
237+
if field.startswith("is_diff_"):
243238
continue
244-
if field.endswith('_b') and row['is_exclusive_b']:
245-
column_name = field.replace('_b', '')
246-
columns[column_name]['isPK'] = column_name in key_columns
247-
columns[column_name]['value'] = value
248-
elif field.endswith('_a') and row['is_exclusive_a']:
249-
column_name = field.replace('_a', '')
250-
columns[column_name]['isPK'] = column_name in key_columns
251-
columns[column_name]['value'] = value
252-
return {
253-
column: JsonExclusiveRowValue(**data)
254-
for column, data in columns.items()
255-
}
239+
if field.endswith("_b") and row["is_exclusive_b"]:
240+
column_name = field.replace("_b", "")
241+
columns[column_name]["isPK"] = column_name in key_columns
242+
columns[column_name]["value"] = value
243+
elif field.endswith("_a") and row["is_exclusive_a"]:
244+
column_name = field.replace("_a", "")
245+
columns[column_name]["isPK"] = column_name in key_columns
246+
columns[column_name]["value"] = value
247+
return {column: JsonExclusiveRowValue(**data) for column, data in columns.items()}
256248

257249

258250
def _jsonify_diff_summary(stats_dict: dict) -> JsonDiffSummary:
259251
return JsonDiffSummary(
260252
rows=Rows(
261-
total=Total(
262-
dataset1=stats_dict["rows_A"],
263-
dataset2=stats_dict["rows_B"]
264-
),
253+
total=Total(dataset1=stats_dict["rows_A"], dataset2=stats_dict["rows_B"]),
265254
exclusive=ExclusiveRows(
266255
dataset1=stats_dict["exclusive_A"],
267256
dataset2=stats_dict["exclusive_B"],
268257
),
269258
updated=stats_dict["updated"],
270-
unchanged=stats_dict["unchanged"]
259+
unchanged=stats_dict["unchanged"],
271260
),
272-
stats=Stats(
273-
diffCounts=stats_dict["stats"]['diff_counts']
274-
)
261+
stats=Stats(diffCounts=stats_dict["stats"]["diff_counts"]),
275262
)
276263

277264

278265
def _jsonify_columns_diff(columns_diff: Dict[str, List[str]], key_columns: List[str]) -> JsonColumnsSummary:
279266
return JsonColumnsSummary(
280267
primaryKey=key_columns,
281-
exclusive= ExclusiveColumns(
282-
dataset2= list(columns_diff.get('added', [])),
283-
dataset1= list(columns_diff.get('removed', [])),
268+
exclusive=ExclusiveColumns(
269+
dataset2=list(columns_diff.get("added", [])),
270+
dataset1=list(columns_diff.get("removed", [])),
284271
),
285-
typeChanged=list(columns_diff.get('changed', [])),
286-
)
272+
typeChanged=list(columns_diff.get("changed", [])),
273+
)

data_diff/info_tree.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,7 @@ def update_from_children(self, child_infos):
3030
# self.diff = list(chain(*[c.diff for c in child_infos]))
3131
self.diff_count = sum(c.diff_count for c in child_infos if c.diff_count is not None)
3232
self.is_diff = any(c.is_diff for c in child_infos)
33-
self.diff_schema = next((child.diff_schema for child in child_infos if child.diff_schema is not None),
34-
None)
33+
self.diff_schema = next((child.diff_schema for child in child_infos if child.diff_schema is not None), None)
3534
self.diff = sum((c.diff for c in child_infos if c.diff is not None), [])
3635

3736
self.rowcounts = {

0 commit comments

Comments
 (0)