From 58ad7b0ffb7640c02a09b77067b3eb9acd9a0436 Mon Sep 17 00:00:00 2001 From: Rex Ledesma Date: Tue, 2 Jul 2024 17:34:04 -0400 Subject: [PATCH] fix(dbt): use correct relation identifier when creating `BaseRelation` (#22823) ## Summary & Motivation When creating the `BaseRelation`, we want to use the name of the table that's created in the data warehouse so that we can properly query the relation. Previously, we were using `dbt_resource_props["name"]` as the identifier, which is the dbt's reference to that table. Instead, we should use the underlying identifier name. For dbt sources, this is `dbt_resource_props["identifier"]`. For dbt models and seeds, this is `dbt_resource_props["alias"]`. ## How I Tested These Changes pytest: added dbt aliases for source and model in existing `test_dagster_metadata` project --- .../dagster_dbt/core/resources_v2.py | 28 +++++++++++++------ .../test_dagster_metadata/dbt_project.yml | 3 ++ .../test_dagster_metadata/models/schema.yml | 3 ++ 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/python_modules/libraries/dagster-dbt/dagster_dbt/core/resources_v2.py b/python_modules/libraries/dagster-dbt/dagster_dbt/core/resources_v2.py index 0b5747f7ee0de..00125729a9eb4 100644 --- a/python_modules/libraries/dagster-dbt/dagster_dbt/core/resources_v2.py +++ b/python_modules/libraries/dagster-dbt/dagster_dbt/core/resources_v2.py @@ -55,7 +55,7 @@ from dagster._model.pydantic_compat_layer import compat_model_validator from dagster._utils import pushd from dagster._utils.warnings import disable_dagster_warnings -from dbt.adapters.base.impl import BaseAdapter, BaseColumn +from dbt.adapters.base.impl import BaseAdapter, BaseColumn, BaseRelation from dbt.adapters.factory import get_adapter, register_adapter, reset_adapters from dbt.config import RuntimeConfig from dbt.config.runtime import load_profile, load_project @@ -774,6 +774,20 @@ class EventHistoryMetadata(NamedTuple): parents: Dict[str, Dict[str, Any]] +def _build_relation_from_dbt_resource_props( + adapter: BaseAdapter, dbt_resource_props: Dict[str, Any] +) -> BaseRelation: + return adapter.Relation.create( + database=dbt_resource_props["database"], + schema=dbt_resource_props["schema"], + identifier=( + dbt_resource_props["identifier"] + if dbt_resource_props["unique_id"].startswith("source") + else dbt_resource_props["alias"] + ), + ) + + def _build_column_lineage_metadata( event_history_metadata: EventHistoryMetadata, dbt_resource_props: Dict[str, Any], @@ -940,10 +954,8 @@ def _fetch_column_metadata( with adapter.connection_named(f"column_metadata_{dbt_resource_props['unique_id']}"): try: - relation = adapter.Relation.create( - database=dbt_resource_props["database"], - schema=dbt_resource_props["schema"], - identifier=dbt_resource_props["name"], + relation = _build_relation_from_dbt_resource_props( + adapter=adapter, dbt_resource_props=dbt_resource_props ) cols: List[BaseColumn] = adapter.get_columns_in_relation(relation=relation) except Exception as e: @@ -983,10 +995,8 @@ def _fetch_column_metadata( parent_unique_id ) or invocation.manifest["sources"].get(parent_unique_id) - parent_relation = adapter.Relation.create( - database=dbt_parent_resource_props["database"], - schema=dbt_parent_resource_props["schema"], - identifier=dbt_parent_resource_props["name"], + parent_relation = _build_relation_from_dbt_resource_props( + adapter=adapter, dbt_resource_props=dbt_parent_resource_props ) parent_columns: List[BaseColumn] = adapter.get_columns_in_relation( relation=parent_relation diff --git a/python_modules/libraries/dagster-dbt/dagster_dbt_tests/dbt_projects/test_dagster_metadata/dbt_project.yml b/python_modules/libraries/dagster-dbt/dagster_dbt_tests/dbt_projects/test_dagster_metadata/dbt_project.yml index 80a3f384a6f84..e304a7d4b25fe 100644 --- a/python_modules/libraries/dagster-dbt/dagster_dbt_tests/dbt_projects/test_dagster_metadata/dbt_project.yml +++ b/python_modules/libraries/dagster-dbt/dagster_dbt_tests/dbt_projects/test_dagster_metadata/dbt_project.yml @@ -30,3 +30,6 @@ models: seeds: +post-hook: - "{{ dagster.log_column_level_metadata(enable_parent_relation_metadata_collection=var('dagster_enable_parent_relation_metadata_collection', 'true')) if env_var('DBT_LOG_COLUMN_METADATA', 'true') == 'true' else null }}" + test_dagster_metadata: + raw_orders: + +alias: aliased_raw_orders diff --git a/python_modules/libraries/dagster-dbt/dagster_dbt_tests/dbt_projects/test_dagster_metadata/models/schema.yml b/python_modules/libraries/dagster-dbt/dagster_dbt_tests/dbt_projects/test_dagster_metadata/models/schema.yml index 05c5be6930f7f..cbd55672d346d 100644 --- a/python_modules/libraries/dagster-dbt/dagster_dbt_tests/dbt_projects/test_dagster_metadata/models/schema.yml +++ b/python_modules/libraries/dagster-dbt/dagster_dbt_tests/dbt_projects/test_dagster_metadata/models/schema.yml @@ -32,6 +32,9 @@ models: - name: orders description: This table has basic information about orders, as well as some derived facts based on payments + config: + alias: aliased_orders + columns: - name: order_id tests: