From c34a83acd68b848c0400d5be5eb5d31973686af5 Mon Sep 17 00:00:00 2001 From: izzy Date: Thu, 7 Mar 2024 11:44:27 -0700 Subject: [PATCH 1/3] shows compiled sql for dbt models in the event logs --- hooli_data_eng/assets/dbt_assets.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/hooli_data_eng/assets/dbt_assets.py b/hooli_data_eng/assets/dbt_assets.py index 5f877870..a74ef748 100644 --- a/hooli_data_eng/assets/dbt_assets.py +++ b/hooli_data_eng/assets/dbt_assets.py @@ -125,10 +125,19 @@ def _process_partitioned_dbt_assets(context: OpExecutionContext, dbt: DbtCliReso dbt_vars = {"min_date": str(first_partition), "max_date": str(last_partition)} dbt_args = ["build", "--vars", json.dumps(dbt_vars)] + # Invoke dbt CLI dbt_cli_task = dbt.cli(dbt_args, context=context) + # Emits an AssetObservation for each asset materialization, which is used to + # identify the Snowflake credit consumption yield from dbt_with_snowflake_insights(context, dbt_cli_task) + # fetch run_results.json to log compiled SQL + run_results_json = dbt_cli_task.get_artifact("run_results.json") + for result in run_results_json["results"]: + model_name = result.get("unique_id") + context.log.info(f"Compiled SQL for {model_name}:\n{result['compiled_code']}") + @dbt_assets( manifest=DBT_MANIFEST, From 10ee40a255248a0459f171ca01a700ae4d86b485 Mon Sep 17 00:00:00 2001 From: izzy Date: Thu, 7 Mar 2024 13:50:34 -0700 Subject: [PATCH 2/3] fixed typo and added locations to schema.yml --- .../models/CLEANED/locations_cleaned.sql | 2 +- dbt_project/models/CLEANED/schema.yml | 23 +++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/dbt_project/models/CLEANED/locations_cleaned.sql b/dbt_project/models/CLEANED/locations_cleaned.sql index 78c7c72a..7c761d7d 100644 --- a/dbt_project/models/CLEANED/locations_cleaned.sql +++ b/dbt_project/models/CLEANED/locations_cleaned.sql @@ -8,7 +8,7 @@ from {{ source("RAW_DATA", "locations") }} source_renamed as ( select l_user_id as user_id, - l_street_address as streed_address, + l_street_address as street_address, l_state as state, l_country as country, l_zip_code as zip_code, diff --git a/dbt_project/models/CLEANED/schema.yml b/dbt_project/models/CLEANED/schema.yml index 706f13f5..164fb798 100644 --- a/dbt_project/models/CLEANED/schema.yml +++ b/dbt_project/models/CLEANED/schema.yml @@ -28,6 +28,7 @@ models: data_type: "float" tests: - greater_than_zero + - name: users_cleaned description: "Raw users data with test accounts removed" columns: @@ -40,3 +41,25 @@ models: - name: "created_at" description: "When the user account was crated" data_type: "timestamp" + + - name: locations_cleaned + description: "Locations data with standardized column names" + columns: + - name: "user_id" + description: "The unique identifier for the user" + data_type: "int" + - name: "street_address" + description: "The user's street address" + data_type: "str" + - name: "state" + description: "The state portion of the user's address" + data_type: "str" + - name: "country" + description: "The unique identifier for the user" + data_type: "str" + - name: "zip_code" + description: "The user's street address" + data_type: "str" + - name: "_sling_loaded_at" + description: "The state portion of the user's address" + data_type: "int" From 864310cd776a02f8a7dcee2a90faeeceb44192c8 Mon Sep 17 00:00:00 2001 From: izzy Date: Fri, 8 Mar 2024 22:40:53 -0700 Subject: [PATCH 3/3] fixed slim CI bug --- hooli_data_eng/assets/dbt_assets.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hooli_data_eng/assets/dbt_assets.py b/hooli_data_eng/assets/dbt_assets.py index a74ef748..d465e3f4 100644 --- a/hooli_data_eng/assets/dbt_assets.py +++ b/hooli_data_eng/assets/dbt_assets.py @@ -170,7 +170,9 @@ def weekly_dbt_assets(context: OpExecutionContext, dbt2: DbtCliResource): # This op will be used to run slim CI -@op +@op( + out={} +) def dbt_slim_ci(dbt2: DbtCliResource): slim_ci_manifest = SLIM_CI_MANIFEST if SLIM_CI_MANIFEST.exists() else DBT_MANIFEST.parent