From 8fe407c3e57f898dbbcb07eee39321fd41e34f81 Mon Sep 17 00:00:00 2001 From: Jacek Date: Mon, 6 Nov 2023 15:25:56 +0100 Subject: [PATCH 1/5] TRIVIAL: gooddata-dbt - split deploy_models to workspace provisioning, data source registration, and deployment of LDM Does not work correctly, because scan_pdm is missing in the case of LDM provisioning Intentionally do not fix it, because in the next step we deprecate PDM (will be removed from the platform very soon) --- gooddata-dbt/gooddata_dbt/args.py | 20 ++++++-- gooddata-dbt/gooddata_dbt/dbt_plugin.py | 68 ++++++++++++------------- 2 files changed, 47 insertions(+), 41 deletions(-) diff --git a/gooddata-dbt/gooddata_dbt/args.py b/gooddata-dbt/gooddata_dbt/args.py index 254def4a2..c76cd1503 100644 --- a/gooddata-dbt/gooddata_dbt/args.py +++ b/gooddata-dbt/gooddata_dbt/args.py @@ -157,11 +157,21 @@ def parse_arguments(description: str) -> argparse.Namespace: set_gooddata_upper_case_args(dbt_cloud_stats) dbt_cloud_stats.set_defaults(method="dbt_cloud_stats") - deploy_models = subparsers.add_parser("deploy_models") - set_dbt_args(deploy_models) - set_environment_id_arg(deploy_models) - set_gooddata_upper_case_args(deploy_models) - deploy_models.set_defaults(method="deploy_models") + provision_workspaces = subparsers.add_parser("provision_workspaces") + set_environment_id_arg(provision_workspaces) + provision_workspaces.set_defaults(method="provision_workspaces") + + register_data_sources = subparsers.add_parser("register_data_sources") + set_dbt_args(register_data_sources) + set_environment_id_arg(register_data_sources) + set_gooddata_upper_case_args(register_data_sources) + register_data_sources.set_defaults(method="register_data_sources") + + deploy_ldm = subparsers.add_parser("deploy_ldm") + set_dbt_args(deploy_ldm) + set_environment_id_arg(deploy_ldm) + set_gooddata_upper_case_args(deploy_ldm) + deploy_ldm.set_defaults(method="deploy_ldm") upload_notification = subparsers.add_parser("upload_notification") set_dbt_args(upload_notification) diff --git a/gooddata-dbt/gooddata_dbt/dbt_plugin.py b/gooddata-dbt/gooddata_dbt/dbt_plugin.py index 4298e1501..95f65246d 100644 --- a/gooddata-dbt/gooddata_dbt/dbt_plugin.py +++ b/gooddata-dbt/gooddata_dbt/dbt_plugin.py @@ -5,13 +5,13 @@ from argparse import Namespace from pathlib import Path from time import time -from typing import Dict, List, Optional +from typing import List, Optional import tabulate import yaml from gooddata_dbt.args import parse_arguments from gooddata_dbt.dbt.cloud import DbtConnection, DbtCredentials, DbtExecution -from gooddata_dbt.dbt.profiles import DbtOutput, DbtProfiles +from gooddata_dbt.dbt.profiles import DbtProfiles from gooddata_dbt.dbt.tables import DbtModelTables from gooddata_dbt.gooddata.config import GoodDataConfig, GoodDataConfigOrganization, GoodDataConfigProduct from gooddata_dbt.logger import get_logger @@ -54,22 +54,11 @@ def generate_and_put_ldm( # Construct GoodData LDM from dbt models declarative_datasets = dbt_tables.make_declarative_datasets(data_source_id, model_ids) ldm = CatalogDeclarativeModel.from_dict({"ldm": declarative_datasets}, camel_case=False) - + print(f"ldm={ldm.to_api()}") # Deploy logical into target workspace sdk.catalog_workspace_content.put_declarative_ldm(workspace_id, ldm) -def register_data_source( - logger: logging.Logger, sdk: GoodDataSdk, data_source_id: str, dbt_target: DbtOutput, dbt_tables: DbtModelTables -) -> None: - logger.info(f"Register data source {data_source_id=} schema={dbt_tables.schema_name}") - data_source = dbt_target.to_gooddata(data_source_id, dbt_tables.schema_name) - sdk.catalog_data_source.create_or_update_data_source(data_source) - - logger.info("Generate and put PDM") - generate_and_put_pdm(logger, sdk, data_source_id, dbt_tables) - - def create_workspace(logger: logging.Logger, sdk: GoodDataSdk, workspace_id: str, workspace_title: str) -> None: logger.info(f"Create workspace {workspace_id=} {workspace_title=}") # Create workspaces, if they do not exist yet, otherwise update them @@ -77,9 +66,6 @@ def create_workspace(logger: logging.Logger, sdk: GoodDataSdk, workspace_id: str sdk.catalog_workspace.create_or_update(workspace=workspace) -DATA_SOURCE_CONTAINER: Dict[str, DbtModelTables] = {} - - def deploy_ldm( logger: logging.Logger, args: Namespace, @@ -88,29 +74,38 @@ def deploy_ldm( model_ids: Optional[List[str]], workspace_id: str, ) -> None: - global DATA_SOURCE_CONTAINER logger.info("Generate and put LDM") dbt_profiles = DbtProfiles(args) - dbt_target = dbt_profiles.target data_source_id = dbt_profiles.data_source_id - # Parse dbt models only once and scan data source only once, not for each product/environment - dbt_tables = DATA_SOURCE_CONTAINER.get(data_source_id) - if dbt_tables is None: - logger.info(f"Process data source {data_source_id=}") - dbt_tables = DbtModelTables.from_local(args.gooddata_upper_case, all_model_ids) - if args.gooddata_upper_case: - dbt_target.schema = dbt_target.schema.upper() - dbt_target.database = dbt_target.database.upper() - register_data_source(logger, sdk_wrapper.sdk, data_source_id, dbt_target, dbt_tables) - DATA_SOURCE_CONTAINER[data_source_id] = dbt_tables - else: - logger.info(f"Data source already processed {data_source_id=} table_count={len(dbt_tables.tables)}") - + dbt_tables = DbtModelTables.from_local(args.gooddata_upper_case, all_model_ids) generate_and_put_ldm(sdk_wrapper.sdk, data_source_id, workspace_id, dbt_tables, model_ids) workspace_url = f"{sdk_wrapper.get_host_from_sdk()}/modeler/#/{workspace_id}" logger.info(f"LDM successfully loaded, verify here: {workspace_url}") +def register_data_source( + logger: logging.Logger, + args: Namespace, + all_model_ids: List[str], + sdk_wrapper: GoodDataSdkWrapper, +): + dbt_profiles = DbtProfiles(args) + dbt_target = dbt_profiles.target + data_source_id = dbt_profiles.data_source_id + logger.info(f"Process data source {data_source_id=}") + dbt_tables = DbtModelTables.from_local(args.gooddata_upper_case, all_model_ids) + if args.gooddata_upper_case: + dbt_target.schema = dbt_target.schema.upper() + dbt_target.database = dbt_target.database.upper() + + logger.info(f"Register data source {data_source_id=} schema={dbt_tables.schema_name}") + data_source = dbt_target.to_gooddata(data_source_id, dbt_tables.schema_name) + sdk_wrapper.sdk.catalog_data_source.create_or_update_data_source(data_source) + + logger.info("Generate and put PDM") + generate_and_put_pdm(logger, sdk_wrapper.sdk, data_source_id, dbt_tables) + + def upload_notification(logger: logging.Logger, sdk: GoodDataSdk, data_source_id: str) -> None: logger.info(f"Upload notification {data_source_id=}") sdk.catalog_data_source.register_upload_notification(data_source_id) @@ -302,6 +297,8 @@ def process_organization( dbt_profiles = DbtProfiles(args) # Caches are invalidated only per data source, not per data product upload_notification(logger, sdk_wrapper.sdk, dbt_profiles.data_source_id) + elif args.method == "register_data_sources": + register_data_source(logger, args, gd_config.all_model_ids, sdk_wrapper) else: if organization: data_products = [dp for dp in gd_config.data_products if dp.id in organization.data_product_ids] @@ -313,11 +310,10 @@ def process_organization( for environment in environments: if environment.id == args.gooddata_environment_id: workspace_id = f"{data_product.id}_{environment.id}" - if args.method == "deploy_models": - workspace_title = f"{data_product.name} ({environment.name})" - # TODO - provision workspaces in a separate args.method? - # We will need to extend it by provisioning of child workspaces, ... + workspace_title = f"{data_product.name} ({environment.name})" + if args.method == "provision_workspaces": create_workspace(logger, sdk_wrapper.sdk, workspace_id, workspace_title) + elif args.method == "deploy_ldm": deploy_ldm( logger, args, gd_config.all_model_ids, sdk_wrapper, data_product.model_ids, workspace_id ) From a2cbc3a256f2c02676b1e30cde8e3a40538fd875 Mon Sep 17 00:00:00 2001 From: Jacek Date: Tue, 7 Nov 2023 17:37:13 +0100 Subject: [PATCH 2/5] TRIVIAL: gooddata-dbt - remove PDM management It will be removed from GoodData soon. Physical properties (tables, columns) are injected into LDM instead. --- gooddata-dbt/gooddata_dbt/dbt/profiles.py | 3 + gooddata-dbt/gooddata_dbt/dbt/tables.py | 29 +--- gooddata-dbt/gooddata_dbt/dbt_plugin.py | 45 ++---- .../tests/resources/dbt_target/manifest.json | 153 ------------------ .../gooddata_layouts/pdm/aircraft.yaml | 15 ++ .../gooddata_layouts/pdm/aircraft_models.yaml | 16 ++ .../gooddata_layouts/pdm/airports.yaml | 31 ++++ .../pdm/ambient_temperature.yaml | 13 ++ .../gooddata_layouts/pdm/carriers.yaml | 16 ++ .../gooddata_layouts/pdm/census.yaml | 16 ++ .../pdm/census_by_country.yaml | 16 ++ .../gooddata_layouts/pdm/commits.yaml | 29 ++++ .../gooddata_layouts/pdm/customer.yaml | 37 +++++ .../gooddata_layouts/pdm/eshop_customers.yaml | 37 +++++ .../gooddata_layouts/pdm/flights.yaml | 58 +++++++ .../gooddata_layouts/pdm/home_rentals.yaml | 34 ++++ .../pdm/house_property_sales_time_series.yaml | 19 +++ .../gooddata_layouts/pdm/mall_customers.yaml | 22 +++ .../pdm/monthlyinventory.yaml | 30 ++++ .../gooddata_layouts/pdm/order_lines.yaml | 49 ++++++ .../gooddata_layouts/pdm/orders.yaml | 16 ++ .../gooddata_layouts/pdm/product.yaml | 37 +++++ .../gooddata_layouts/pdm/pull_requests.yaml | 47 ++++++ .../resources/gooddata_layouts/pdm/repos.yaml | 25 +++ .../gooddata_layouts/pdm/returns.yaml | 43 +++++ .../pdm/telco_customer_churn.yaml | 70 ++++++++ .../resources/gooddata_layouts/pdm/users.yaml | 22 +++ .../gooddata_layouts/pdm/workflow_runs.yaml | 33 ++++ gooddata-dbt/tests/test_tables.py | 33 +++- 29 files changed, 780 insertions(+), 214 deletions(-) create mode 100644 gooddata-dbt/tests/resources/gooddata_layouts/pdm/aircraft.yaml create mode 100644 gooddata-dbt/tests/resources/gooddata_layouts/pdm/aircraft_models.yaml create mode 100644 gooddata-dbt/tests/resources/gooddata_layouts/pdm/airports.yaml create mode 100644 gooddata-dbt/tests/resources/gooddata_layouts/pdm/ambient_temperature.yaml create mode 100644 gooddata-dbt/tests/resources/gooddata_layouts/pdm/carriers.yaml create mode 100644 gooddata-dbt/tests/resources/gooddata_layouts/pdm/census.yaml create mode 100644 gooddata-dbt/tests/resources/gooddata_layouts/pdm/census_by_country.yaml create mode 100644 gooddata-dbt/tests/resources/gooddata_layouts/pdm/commits.yaml create mode 100644 gooddata-dbt/tests/resources/gooddata_layouts/pdm/customer.yaml create mode 100644 gooddata-dbt/tests/resources/gooddata_layouts/pdm/eshop_customers.yaml create mode 100644 gooddata-dbt/tests/resources/gooddata_layouts/pdm/flights.yaml create mode 100644 gooddata-dbt/tests/resources/gooddata_layouts/pdm/home_rentals.yaml create mode 100644 gooddata-dbt/tests/resources/gooddata_layouts/pdm/house_property_sales_time_series.yaml create mode 100644 gooddata-dbt/tests/resources/gooddata_layouts/pdm/mall_customers.yaml create mode 100644 gooddata-dbt/tests/resources/gooddata_layouts/pdm/monthlyinventory.yaml create mode 100644 gooddata-dbt/tests/resources/gooddata_layouts/pdm/order_lines.yaml create mode 100644 gooddata-dbt/tests/resources/gooddata_layouts/pdm/orders.yaml create mode 100644 gooddata-dbt/tests/resources/gooddata_layouts/pdm/product.yaml create mode 100644 gooddata-dbt/tests/resources/gooddata_layouts/pdm/pull_requests.yaml create mode 100644 gooddata-dbt/tests/resources/gooddata_layouts/pdm/repos.yaml create mode 100644 gooddata-dbt/tests/resources/gooddata_layouts/pdm/returns.yaml create mode 100644 gooddata-dbt/tests/resources/gooddata_layouts/pdm/telco_customer_churn.yaml create mode 100644 gooddata-dbt/tests/resources/gooddata_layouts/pdm/users.yaml create mode 100644 gooddata-dbt/tests/resources/gooddata_layouts/pdm/workflow_runs.yaml diff --git a/gooddata-dbt/gooddata_dbt/dbt/profiles.py b/gooddata-dbt/gooddata_dbt/dbt/profiles.py index 786c3ef61..c5697711c 100644 --- a/gooddata-dbt/gooddata_dbt/dbt/profiles.py +++ b/gooddata-dbt/gooddata_dbt/dbt/profiles.py @@ -180,6 +180,9 @@ def profile(self) -> DbtProfile: def target(self) -> DbtOutput: for output in self.profile.outputs: if output.name == self.args.target: + if self.args.gooddata_upper_case: + output.schema = output.schema.upper() + output.database = output.database.upper() return output raise ValueError(f"Target {self.args.target} not found in {self.profile.outputs}.") diff --git a/gooddata-dbt/gooddata_dbt/dbt/tables.py b/gooddata-dbt/gooddata_dbt/dbt/tables.py index 3cfe08611..e63529922 100644 --- a/gooddata-dbt/gooddata_dbt/dbt/tables.py +++ b/gooddata-dbt/gooddata_dbt/dbt/tables.py @@ -257,28 +257,6 @@ def get_scan_column(table: CatalogDeclarativeTable, column_name: str) -> Catalog scan_columns = [s.name for s in table.columns] raise Exception(f"get_scan_column table={table.id} column={column_name} not found in scan. {scan_columns=}") - def make_pdm(self, scan_pdm: CatalogDeclarativeTables) -> Dict: - self.set_data_types(scan_pdm) - tables = [] - for table in self.tables: - scan_table = self.get_scan_table(scan_pdm, table.name) - columns = [] - for column in table.columns.values(): - # dbt does not propagate data types to manifest (not yet?) - scan_column = self.get_scan_column(scan_table, column.name) - column.data_type = column.data_type or scan_column.data_type - - columns.append({"name": column.name, "data_type": column.data_type}) - tables.append( - { - "id": table.name, - "path": [self.schema_name, table.name], - "type": "TABLE", - "columns": columns, - } - ) - return {"tables": tables} - @staticmethod def get_ldm_title(column: DbtModelColumn) -> str: return column.description or column.name @@ -351,6 +329,7 @@ def make_references(self, table: DbtModelTable, role_playing_tables: Dict) -> Li "identifier": {"id": referenced_object_id, "type": "dataset"}, "multivalue": False, "source_columns": [column.name], + "source_column_data_types": [column.data_type], } ) return references @@ -367,6 +346,7 @@ def make_facts(table: DbtModelTable) -> List[Dict]: "title": column.gooddata_ldm_title, "description": column.gooddata_ldm_description, "source_column": column.name, + "source_column_data_type": column.data_type, "tags": [table.gooddata_ldm_title] + column.tags, } ) @@ -383,6 +363,7 @@ def make_labels(table: DbtModelTable, attribute_column: DbtModelColumn) -> List[ "title": column.gooddata_ldm_title, "description": column.gooddata_ldm_description, "source_column": column.name, + "source_column_data_type": column.data_type, "value_type": column.meta.gooddata.label_type, "tags": [table.gooddata_ldm_title] + column.tags, } @@ -400,6 +381,7 @@ def make_attributes(self, table: DbtModelTable) -> List[Dict]: "title": column.gooddata_ldm_title, "description": column.gooddata_ldm_description, "source_column": column.name, + "source_column_data_type": column.data_type, "tags": [table.gooddata_ldm_title] + column.tags, "labels": self.make_labels(table, column), } @@ -444,7 +426,8 @@ def make_dataset(self, data_source_id: str, table: DbtModelTable, role_playing_t "tags": [table.gooddata_ldm_title] + table.tags, "data_source_table_id": { "data_source_id": data_source_id, - "id": table.name, # TODO - may not be unique + "id": f"{self.schema_name}__{table.name}", + "path": [self.schema_name, table.name], "type": "dataSource", }, "grain": grain, diff --git a/gooddata-dbt/gooddata_dbt/dbt_plugin.py b/gooddata-dbt/gooddata_dbt/dbt_plugin.py index 95f65246d..905a6962a 100644 --- a/gooddata-dbt/gooddata_dbt/dbt_plugin.py +++ b/gooddata-dbt/gooddata_dbt/dbt_plugin.py @@ -18,13 +18,9 @@ from gooddata_dbt.sdk_wrapper import GoodDataSdkWrapper from gooddata_dbt.utils import report_message_to_merge_request -from gooddata_sdk import ( - CatalogDeclarativeModel, - CatalogDeclarativeTables, - CatalogScanModelRequest, - CatalogWorkspace, - GoodDataSdk, -) +from gooddata_sdk import CatalogDeclarativeModel, CatalogScanModelRequest, CatalogWorkspace, GoodDataSdk + +# TODO - upgrade AIO, cleanup, start from scratch, test everything GOODDATA_LAYOUTS_DIR = Path("gooddata_layouts") @@ -33,28 +29,23 @@ def layout_model_path(data_product: GoodDataConfigProduct) -> Path: return GOODDATA_LAYOUTS_DIR / data_product.id -def generate_and_put_pdm( - logger: logging.Logger, sdk: GoodDataSdk, data_source_id: str, dbt_tables: DbtModelTables +def generate_and_put_ldm( + logger: logging.Logger, + sdk: GoodDataSdk, + data_source_id: str, + workspace_id: str, + dbt_tables: DbtModelTables, + model_ids: Optional[List[str]], ) -> None: - # Construct GoodData PDM from dbt models and put it to the server - # GoodData caches the metadata to reduce querying them (costly) in runtime. scan_request = CatalogScanModelRequest(scan_tables=True, scan_views=True) logger.info(f"Scan data source {data_source_id=}") scan_pdm = sdk.catalog_data_source.scan_data_source(data_source_id, scan_request, report_warnings=True).pdm - - logger.info(f"Generate and put PDM {data_source_id=}") - pdm = dbt_tables.make_pdm(scan_pdm) - declarative_tables = CatalogDeclarativeTables.from_dict(pdm, camel_case=False) - sdk.catalog_data_source.put_declarative_pdm(data_source_id, declarative_tables) - - -def generate_and_put_ldm( - sdk: GoodDataSdk, data_source_id: str, workspace_id: str, dbt_tables: DbtModelTables, model_ids: Optional[List[str]] -) -> None: + scan_pdm.store_to_disk(Path("test")) + # Store data types to dbt_tables class. It is used in make_declarative_datasets to inject data types to LDM. + dbt_tables.set_data_types(scan_pdm) # Construct GoodData LDM from dbt models declarative_datasets = dbt_tables.make_declarative_datasets(data_source_id, model_ids) ldm = CatalogDeclarativeModel.from_dict({"ldm": declarative_datasets}, camel_case=False) - print(f"ldm={ldm.to_api()}") # Deploy logical into target workspace sdk.catalog_workspace_content.put_declarative_ldm(workspace_id, ldm) @@ -78,7 +69,7 @@ def deploy_ldm( dbt_profiles = DbtProfiles(args) data_source_id = dbt_profiles.data_source_id dbt_tables = DbtModelTables.from_local(args.gooddata_upper_case, all_model_ids) - generate_and_put_ldm(sdk_wrapper.sdk, data_source_id, workspace_id, dbt_tables, model_ids) + generate_and_put_ldm(logger, sdk_wrapper.sdk, data_source_id, workspace_id, dbt_tables, model_ids) workspace_url = f"{sdk_wrapper.get_host_from_sdk()}/modeler/#/{workspace_id}" logger.info(f"LDM successfully loaded, verify here: {workspace_url}") @@ -88,23 +79,17 @@ def register_data_source( args: Namespace, all_model_ids: List[str], sdk_wrapper: GoodDataSdkWrapper, -): +) -> None: dbt_profiles = DbtProfiles(args) dbt_target = dbt_profiles.target data_source_id = dbt_profiles.data_source_id logger.info(f"Process data source {data_source_id=}") dbt_tables = DbtModelTables.from_local(args.gooddata_upper_case, all_model_ids) - if args.gooddata_upper_case: - dbt_target.schema = dbt_target.schema.upper() - dbt_target.database = dbt_target.database.upper() logger.info(f"Register data source {data_source_id=} schema={dbt_tables.schema_name}") data_source = dbt_target.to_gooddata(data_source_id, dbt_tables.schema_name) sdk_wrapper.sdk.catalog_data_source.create_or_update_data_source(data_source) - logger.info("Generate and put PDM") - generate_and_put_pdm(logger, sdk_wrapper.sdk, data_source_id, dbt_tables) - def upload_notification(logger: logging.Logger, sdk: GoodDataSdk, data_source_id: str) -> None: logger.info(f"Upload notification {data_source_id=}") diff --git a/gooddata-dbt/tests/resources/dbt_target/manifest.json b/gooddata-dbt/tests/resources/dbt_target/manifest.json index 188632995..24e01d028 100644 --- a/gooddata-dbt/tests/resources/dbt_target/manifest.json +++ b/gooddata-dbt/tests/resources/dbt_target/manifest.json @@ -11,159 +11,6 @@ "adapter_type": "postgres" }, "nodes": { - "model.cicd_demo.exchange_rate": { - "database": "demo", - "schema": "cicd_output_stage", - "name": "exchange_rate", - "resource_type": "model", - "package_name": "cicd_demo", - "path": "exchange_rate_host/exchange_rate.sql", - "original_file_path": "models/exchange_rate_host/exchange_rate.sql", - "unique_id": "model.cicd_demo.exchange_rate", - "fqn": [ - "cicd_demo", - "exchange_rate_host", - "exchange_rate" - ], - "alias": "exchange_rate", - "checksum": { - "name": "sha256", - "checksum": "317f7efa72d909d2bb0adff621bc74b8356609dee2b57d8638bd276d7b045e87" - }, - "config": { - "enabled": true, - "alias": null, - "schema": "cicd_output_stage", - "database": null, - "tags": [], - "meta": { - "gooddata": { - "model_id": "github" - } - }, - "materialized": "incremental", - "incremental_strategy": null, - "persist_docs": {}, - "quoting": {}, - "column_types": {}, - "full_refresh": null, - "unique_key": "created_at", - "on_schema_change": "ignore", - "grants": {}, - "packages": [], - "docs": { - "show": true, - "node_color": null - }, - "indexes": [ - { - "columns": [ - "created_at" - ], - "unique": true - } - ], - "post-hook": [], - "pre-hook": [] - }, - "tags": [], - "description": "Exchange rates", - "columns": { - "created_at": { - "name": "created_at", - "description": "", - "meta": {}, - "data_type": null, - "quote": null, - "tags": [] - }, - "usd": { - "name": "usd", - "description": "", - "meta": { - "gooddata": { - "ldm_type": "fact" - } - }, - "data_type": null, - "quote": null, - "tags": [] - }, - "czk": { - "name": "czk", - "description": "", - "meta": { - "gooddata": { - "ldm_type": "fact" - } - }, - "data_type": null, - "quote": null, - "tags": [] - }, - "btc": { - "name": "btc", - "description": "", - "meta": { - "gooddata": { - "ldm_type": "fact" - } - }, - "data_type": null, - "quote": null, - "tags": [] - } - }, - "meta": { - "gooddata": { - "model_id": "github" - } - }, - "docs": { - "show": true, - "node_color": null - }, - "patch_path": "cicd_demo://models/exchange_rate_host/schema.yml", - "build_path": null, - "deferred": false, - "unrendered_config": { - "schema": "{{ env_var('OUTPUT_SCHEMA', 'cicd_output_stage') }}", - "materialized": "incremental", - "meta": { - "gooddata": { - "model_id": "github" - } - }, - "indexes": [ - { - "columns": [ - "created_at" - ], - "unique": true - } - ], - "unique_key": "created_at" - }, - "created_at": 1692692941.444701, - "relation_name": "\"demo\".\"cicd_output_stage\".\"exchange_rate\"", - "raw_code": "{{ config(\n indexes=[\n {'columns': ['created_at'], 'unique': true}\n ],\n materialized='incremental',\n unique_key='created_at'\n) }}\n\nwith using_clause as (\n select\n *\n from {{ var(\"input_schema_exchangeratehost\") }}.exchange_rate\n {% if is_incremental() %}\n where \"{{ get_db_entity_name('date') }}\" > ( select max(created_at) from {{ this }} )\n {% endif %}\n),\n\nupdates as (\n select *\n from using_clause\n {% if is_incremental() %}\n where \"{{ get_db_entity_name('date') }}\" in ( select created_at from {{ this }} )\n {% else %}\n -- No updates when doing full load\n where 1 = 0\n {% endif %}\n),\n\ninserts as (\n select *\n from using_clause\n {% if is_incremental() %}\n where \"{{ get_db_entity_name('date') }}\" not in ( select created_at from {{ this }} )\n {% endif %}\n),\n\nfinal as (\n select\n \"{{ get_db_entity_name('date') }}\" as created_at,\n usd,\n czk,\n btc\n from (\n select * from inserts\n union all select * from updates\n ) c\n)\n\nselect * from final", - "language": "sql", - "refs": [], - "sources": [], - "metrics": [], - "depends_on": { - "macros": [ - "macro.dbt.is_incremental", - "macro.cicd_demo.get_db_entity_name" - ], - "nodes": [] - }, - "compiled_path": "target/compiled/cicd_demo/models/exchange_rate_host/exchange_rate.sql", - "compiled": true, - "compiled_code": "\n\nwith using_clause as (\n select\n *\n from exchangeratehost_input_stage.exchange_rate\n \n where \"date\" > ( select max(created_at) from \"demo\".\"cicd_output_stage\".\"exchange_rate\" )\n \n),\n\nupdates as (\n select *\n from using_clause\n \n where \"date\" in ( select created_at from \"demo\".\"cicd_output_stage\".\"exchange_rate\" )\n \n),\n\ninserts as (\n select *\n from using_clause\n \n where \"date\" not in ( select created_at from \"demo\".\"cicd_output_stage\".\"exchange_rate\" )\n \n),\n\nfinal as (\n select\n \"date\" as created_at,\n usd,\n czk,\n btc\n from (\n select * from inserts\n union all select * from updates\n ) c\n)\n\nselect * from final", - "extra_ctes_injected": true, - "extra_ctes": [] - }, "model.cicd_demo.commits": { "database": "demo", "schema": "cicd_output_stage", diff --git a/gooddata-dbt/tests/resources/gooddata_layouts/pdm/aircraft.yaml b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/aircraft.yaml new file mode 100644 index 000000000..fdb873ad0 --- /dev/null +++ b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/aircraft.yaml @@ -0,0 +1,15 @@ +# (C) 2023 GoodData Corporation +columns: + - dataType: STRING + isPrimaryKey: false + name: aircraft_model_code + referencedTableColumn: aircraft_model_code + referencedTableId: aircraft_models + - dataType: STRING + isPrimaryKey: true + name: tail_num +id: aircraft +path: + - cicd_output_stage + - aircraft +type: TABLE diff --git a/gooddata-dbt/tests/resources/gooddata_layouts/pdm/aircraft_models.yaml b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/aircraft_models.yaml new file mode 100644 index 000000000..d28d89c8b --- /dev/null +++ b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/aircraft_models.yaml @@ -0,0 +1,16 @@ +# (C) 2023 GoodData Corporation +columns: + - dataType: STRING + isPrimaryKey: true + name: aircraft_model_code + - dataType: STRING + isPrimaryKey: false + name: manufacturer + - dataType: INT + isPrimaryKey: false + name: seats +id: aircraft_models +path: + - cicd_output_stage + - aircraft_models +type: TABLE diff --git a/gooddata-dbt/tests/resources/gooddata_layouts/pdm/airports.yaml b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/airports.yaml new file mode 100644 index 000000000..13ad88856 --- /dev/null +++ b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/airports.yaml @@ -0,0 +1,31 @@ +# (C) 2023 GoodData Corporation +columns: + - dataType: STRING + isPrimaryKey: true + name: code + - dataType: INT + isPrimaryKey: false + name: elevation + - dataType: STRING + isPrimaryKey: false + name: faa_region + - dataType: STRING + isPrimaryKey: false + name: fac_type + - dataType: STRING + isPrimaryKey: false + name: latitude + - dataType: STRING + isPrimaryKey: false + name: longitude + - dataType: STRING + isPrimaryKey: false + name: name + - dataType: STRING + isPrimaryKey: false + name: state +id: airports +path: + - cicd_output_stage + - airports +type: TABLE diff --git a/gooddata-dbt/tests/resources/gooddata_layouts/pdm/ambient_temperature.yaml b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/ambient_temperature.yaml new file mode 100644 index 000000000..176e32b2a --- /dev/null +++ b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/ambient_temperature.yaml @@ -0,0 +1,13 @@ +# (C) 2023 GoodData Corporation +columns: + - dataType: NUMERIC + isPrimaryKey: false + name: temperature + - dataType: TIMESTAMP + isPrimaryKey: false + name: timestamp +id: ambient_temperature +path: + - cicd_output_stage + - ambient_temperature +type: TABLE diff --git a/gooddata-dbt/tests/resources/gooddata_layouts/pdm/carriers.yaml b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/carriers.yaml new file mode 100644 index 000000000..7ffc55478 --- /dev/null +++ b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/carriers.yaml @@ -0,0 +1,16 @@ +# (C) 2023 GoodData Corporation +columns: + - dataType: STRING + isPrimaryKey: true + name: code + - dataType: STRING + isPrimaryKey: false + name: name + - dataType: STRING + isPrimaryKey: false + name: nickname +id: carriers +path: + - cicd_output_stage + - carriers +type: TABLE diff --git a/gooddata-dbt/tests/resources/gooddata_layouts/pdm/census.yaml b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/census.yaml new file mode 100644 index 000000000..5638143ae --- /dev/null +++ b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/census.yaml @@ -0,0 +1,16 @@ +# (C) 2023 GoodData Corporation +columns: + - dataType: STRING + isPrimaryKey: false + name: continent + - dataType: NUMERIC + isPrimaryKey: false + name: population + - dataType: TIMESTAMP + isPrimaryKey: false + name: timestamp +id: census +path: + - cicd_output_stage + - census +type: TABLE diff --git a/gooddata-dbt/tests/resources/gooddata_layouts/pdm/census_by_country.yaml b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/census_by_country.yaml new file mode 100644 index 000000000..30b45cbc4 --- /dev/null +++ b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/census_by_country.yaml @@ -0,0 +1,16 @@ +# (C) 2023 GoodData Corporation +columns: + - dataType: STRING + isPrimaryKey: false + name: country + - dataType: NUMERIC + isPrimaryKey: false + name: population + - dataType: TIMESTAMP + isPrimaryKey: false + name: timestamp +id: census_by_country +path: + - cicd_output_stage + - census_by_country +type: TABLE diff --git a/gooddata-dbt/tests/resources/gooddata_layouts/pdm/commits.yaml b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/commits.yaml new file mode 100644 index 000000000..2ad580359 --- /dev/null +++ b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/commits.yaml @@ -0,0 +1,29 @@ +# (C) 2023 GoodData Corporation +columns: + - dataType: INT + isPrimaryKey: false + name: comment_count + - dataType: STRING + isPrimaryKey: true + name: commit_id + - dataType: STRING + isPrimaryKey: false + name: commit_url + - dataType: TIMESTAMP + isPrimaryKey: false + name: created_at + - dataType: INT + isPrimaryKey: false + name: repo_id + referencedTableColumn: repo_id + referencedTableId: repos + - dataType: INT + isPrimaryKey: false + name: user_id + referencedTableColumn: user_id + referencedTableId: users +id: commits +path: + - cicd_output_stage + - commits +type: TABLE diff --git a/gooddata-dbt/tests/resources/gooddata_layouts/pdm/customer.yaml b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/customer.yaml new file mode 100644 index 000000000..1fe7bbce1 --- /dev/null +++ b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/customer.yaml @@ -0,0 +1,37 @@ +# (C) 2023 GoodData Corporation +columns: + - dataType: STRING + isPrimaryKey: false + name: customer_city + - dataType: STRING + isPrimaryKey: false + name: customer_country + - dataType: TIMESTAMP + isPrimaryKey: false + name: customer_created_date + - dataType: STRING + isPrimaryKey: false + name: customer_email + - dataType: STRING + isPrimaryKey: true + name: customer_id + - dataType: STRING + isPrimaryKey: false + name: customer_state + - dataType: STRING + isPrimaryKey: false + name: geo__customer_city__city_pushpin_latitude + - dataType: STRING + isPrimaryKey: false + name: geo__customer_city__city_pushpin_longitude + - dataType: STRING + isPrimaryKey: false + name: ls__customer_id__customer_name + - dataType: STRING + isPrimaryKey: false + name: wdf__client_id +id: customer +path: + - cicd_output_stage + - customer +type: TABLE diff --git a/gooddata-dbt/tests/resources/gooddata_layouts/pdm/eshop_customers.yaml b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/eshop_customers.yaml new file mode 100644 index 000000000..ac1b5e122 --- /dev/null +++ b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/eshop_customers.yaml @@ -0,0 +1,37 @@ +# (C) 2023 GoodData Corporation +columns: + - dataType: NUMERIC + isPrimaryKey: false + name: clv + - dataType: STRING + isPrimaryKey: false + name: country + - dataType: STRING + isPrimaryKey: false + name: email + - dataType: STRING + isPrimaryKey: false + name: first_name + - dataType: STRING + isPrimaryKey: false + name: gender + - dataType: STRING + isPrimaryKey: true + name: id + - dataType: STRING + isPrimaryKey: false + name: last_name + - dataType: NUMERIC + isPrimaryKey: false + name: nps + - dataType: STRING + isPrimaryKey: false + name: username + - dataType: NUMERIC + isPrimaryKey: false + name: visits +id: eshop_customers +path: + - cicd_output_stage + - eshop_customers +type: TABLE diff --git a/gooddata-dbt/tests/resources/gooddata_layouts/pdm/flights.yaml b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/flights.yaml new file mode 100644 index 000000000..3b3409e95 --- /dev/null +++ b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/flights.yaml @@ -0,0 +1,58 @@ +# (C) 2023 GoodData Corporation +columns: + - dataType: INT + isPrimaryKey: false + name: arr_delay + - dataType: STRING + isPrimaryKey: false + name: cancelled + - dataType: STRING + isPrimaryKey: false + name: carrier + referencedTableColumn: code + referencedTableId: carriers + - dataType: INT + isPrimaryKey: false + name: dep_delay + - dataType: TIMESTAMP + isPrimaryKey: false + name: dep_time + - dataType: STRING + isPrimaryKey: false + name: destination + - dataType: INT + isPrimaryKey: false + name: distance + - dataType: STRING + isPrimaryKey: false + name: diverted + - dataType: STRING + isPrimaryKey: false + name: flight_num + - dataType: INT + isPrimaryKey: false + name: flight_time + - dataType: STRING + isPrimaryKey: true + name: id2 + - dataType: STRING + isPrimaryKey: false + name: origin + referencedTableColumn: code + referencedTableId: airports + - dataType: STRING + isPrimaryKey: false + name: tail_num + referencedTableColumn: tail_num + referencedTableId: aircraft + - dataType: INT + isPrimaryKey: false + name: taxi_in + - dataType: INT + isPrimaryKey: false + name: taxi_out +id: flights +path: + - cicd_output_stage + - flights +type: TABLE diff --git a/gooddata-dbt/tests/resources/gooddata_layouts/pdm/home_rentals.yaml b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/home_rentals.yaml new file mode 100644 index 000000000..a85c49bbf --- /dev/null +++ b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/home_rentals.yaml @@ -0,0 +1,34 @@ +# (C) 2023 GoodData Corporation +columns: + - dataType: INT + isPrimaryKey: false + name: days_on_market + - dataType: NUMERIC + isPrimaryKey: false + name: initial_price + - dataType: STRING + isPrimaryKey: false + name: location + - dataType: STRING + isPrimaryKey: false + name: neighborhood + - dataType: INT + isPrimaryKey: false + name: number_of_bathrooms + - dataType: INT + isPrimaryKey: false + name: number_of_rooms + - dataType: INT + isPrimaryKey: false + name: number_of_rooms_fact + - dataType: NUMERIC + isPrimaryKey: false + name: rental_price + - dataType: NUMERIC + isPrimaryKey: false + name: square_feets +id: home_rentals +path: + - cicd_output_stage + - home_rentals +type: TABLE diff --git a/gooddata-dbt/tests/resources/gooddata_layouts/pdm/house_property_sales_time_series.yaml b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/house_property_sales_time_series.yaml new file mode 100644 index 000000000..8ececba2b --- /dev/null +++ b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/house_property_sales_time_series.yaml @@ -0,0 +1,19 @@ +# (C) 2023 GoodData Corporation +columns: + - dataType: STRING + isPrimaryKey: false + name: bedrooms + - dataType: NUMERIC + isPrimaryKey: false + name: ma + - dataType: TIMESTAMP + isPrimaryKey: false + name: saledate + - dataType: STRING + isPrimaryKey: false + name: type +id: house_property_sales_time_series +path: + - cicd_output_stage + - house_property_sales_time_series +type: TABLE diff --git a/gooddata-dbt/tests/resources/gooddata_layouts/pdm/mall_customers.yaml b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/mall_customers.yaml new file mode 100644 index 000000000..d7e8f778b --- /dev/null +++ b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/mall_customers.yaml @@ -0,0 +1,22 @@ +# (C) 2023 GoodData Corporation +columns: + - dataType: NUMERIC + isPrimaryKey: false + name: age + - dataType: NUMERIC + isPrimaryKey: false + name: annualincome + - dataType: STRING + isPrimaryKey: true + name: customerid + - dataType: STRING + isPrimaryKey: false + name: gender + - dataType: NUMERIC + isPrimaryKey: false + name: spendingscore +id: mall_customers +path: + - cicd_output_stage + - mall_customers +type: TABLE diff --git a/gooddata-dbt/tests/resources/gooddata_layouts/pdm/monthlyinventory.yaml b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/monthlyinventory.yaml new file mode 100644 index 000000000..0a16a62b0 --- /dev/null +++ b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/monthlyinventory.yaml @@ -0,0 +1,30 @@ +# (C) 2023 GoodData Corporation +columns: + - dataType: TIMESTAMP + isPrimaryKey: false + name: date + - dataType: TIMESTAMP + isPrimaryKey: false + name: inventory_month + - dataType: STRING + isPrimaryKey: true + name: monthly_inventory_id + - dataType: NUMERIC + isPrimaryKey: false + name: monthly_quantity_bom + - dataType: NUMERIC + isPrimaryKey: false + name: monthly_quantity_eom + - dataType: STRING + isPrimaryKey: false + name: product__product_id + referencedTableColumn: product_id + referencedTableId: product + - dataType: STRING + isPrimaryKey: false + name: wdf__client_id +id: monthlyinventory +path: + - cicd_output_stage + - monthlyinventory +type: TABLE diff --git a/gooddata-dbt/tests/resources/gooddata_layouts/pdm/order_lines.yaml b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/order_lines.yaml new file mode 100644 index 000000000..7bf921689 --- /dev/null +++ b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/order_lines.yaml @@ -0,0 +1,49 @@ +# (C) 2023 GoodData Corporation +columns: + - dataType: STRING + isPrimaryKey: false + name: customer__customer_id + referencedTableColumn: customer_id + referencedTableId: customer + - dataType: STRING + isPrimaryKey: false + name: customer_age + - dataType: TIMESTAMP + isPrimaryKey: false + name: date + - dataType: STRING + isPrimaryKey: false + name: order__order_id + referencedTableColumn: order_id + referencedTableId: orders + - dataType: TIMESTAMP + isPrimaryKey: false + name: order_date + - dataType: STRING + isPrimaryKey: true + name: order_line_id + - dataType: NUMERIC + isPrimaryKey: false + name: order_unit_cost + - dataType: NUMERIC + isPrimaryKey: false + name: order_unit_discount + - dataType: NUMERIC + isPrimaryKey: false + name: order_unit_price + - dataType: NUMERIC + isPrimaryKey: false + name: order_unit_quantity + - dataType: STRING + isPrimaryKey: false + name: product__product_id + referencedTableColumn: product_id + referencedTableId: product + - dataType: STRING + isPrimaryKey: false + name: wdf__client_id +id: order_lines +path: + - cicd_output_stage + - order_lines +type: TABLE diff --git a/gooddata-dbt/tests/resources/gooddata_layouts/pdm/orders.yaml b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/orders.yaml new file mode 100644 index 000000000..1206afe4f --- /dev/null +++ b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/orders.yaml @@ -0,0 +1,16 @@ +# (C) 2023 GoodData Corporation +columns: + - dataType: STRING + isPrimaryKey: true + name: order_id + - dataType: STRING + isPrimaryKey: false + name: order_status + - dataType: STRING + isPrimaryKey: false + name: wdf__client_id +id: orders +path: + - cicd_output_stage + - orders +type: TABLE diff --git a/gooddata-dbt/tests/resources/gooddata_layouts/pdm/product.yaml b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/product.yaml new file mode 100644 index 000000000..18029adeb --- /dev/null +++ b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/product.yaml @@ -0,0 +1,37 @@ +# (C) 2023 GoodData Corporation +columns: + - dataType: STRING + isPrimaryKey: false + name: ls__product_id__product_id_image_web + - dataType: STRING + isPrimaryKey: false + name: ls__product_id__product_name + - dataType: STRING + isPrimaryKey: false + name: ls__product_image__product_image_web + - dataType: STRING + isPrimaryKey: false + name: product_brand + - dataType: STRING + isPrimaryKey: false + name: product_category + - dataType: STRING + isPrimaryKey: true + name: product_id + - dataType: STRING + isPrimaryKey: false + name: product_image + - dataType: STRING + isPrimaryKey: false + name: product_rating + - dataType: NUMERIC + isPrimaryKey: false + name: rating + - dataType: STRING + isPrimaryKey: false + name: wdf__product_category +id: product +path: + - cicd_output_stage + - product +type: TABLE diff --git a/gooddata-dbt/tests/resources/gooddata_layouts/pdm/pull_requests.yaml b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/pull_requests.yaml new file mode 100644 index 000000000..785151da2 --- /dev/null +++ b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/pull_requests.yaml @@ -0,0 +1,47 @@ +# (C) 2023 GoodData Corporation +columns: + - dataType: TIMESTAMP + isPrimaryKey: false + name: closed_at + - dataType: TIMESTAMP + isPrimaryKey: false + name: created_at + - dataType: NUMERIC + isPrimaryKey: false + name: days_to_solve + - dataType: INT + isPrimaryKey: false + name: id + - dataType: TIMESTAMP + isPrimaryKey: false + name: merged_at + - dataType: BOOLEAN + isPrimaryKey: false + name: pull_request_draft + - dataType: STRING + isPrimaryKey: true + name: pull_request_id + - dataType: INT + isPrimaryKey: false + name: pull_request_number + - dataType: STRING + isPrimaryKey: false + name: pull_request_title + - dataType: STRING + isPrimaryKey: false + name: pull_request_url + - dataType: INT + isPrimaryKey: false + name: repo_id + referencedTableColumn: repo_id + referencedTableId: repos + - dataType: INT + isPrimaryKey: false + name: user_id + referencedTableColumn: user_id + referencedTableId: users +id: pull_requests +path: + - cicd_output_stage + - pull_requests +type: TABLE diff --git a/gooddata-dbt/tests/resources/gooddata_layouts/pdm/repos.yaml b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/repos.yaml new file mode 100644 index 000000000..f141a0261 --- /dev/null +++ b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/repos.yaml @@ -0,0 +1,25 @@ +# (C) 2023 GoodData Corporation +columns: + - dataType: TIMESTAMP + isPrimaryKey: false + name: repo_created_at + - dataType: INT + isPrimaryKey: true + name: repo_id + - dataType: STRING + isPrimaryKey: false + name: repo_name + - dataType: STRING + isPrimaryKey: false + name: repo_url + - dataType: INT + isPrimaryKey: false + name: stargazers_count + - dataType: INT + isPrimaryKey: false + name: watchers_count +id: repos +path: + - cicd_output_stage + - repos +type: TABLE diff --git a/gooddata-dbt/tests/resources/gooddata_layouts/pdm/returns.yaml b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/returns.yaml new file mode 100644 index 000000000..57d33a965 --- /dev/null +++ b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/returns.yaml @@ -0,0 +1,43 @@ +# (C) 2023 GoodData Corporation +columns: + - dataType: STRING + isPrimaryKey: false + name: customer__customer_id + referencedTableColumn: customer_id + referencedTableId: customer + - dataType: TIMESTAMP + isPrimaryKey: false + name: date + - dataType: STRING + isPrimaryKey: false + name: order__order_id + referencedTableColumn: order_id + referencedTableId: orders + - dataType: STRING + isPrimaryKey: false + name: product__product_id + referencedTableColumn: product_id + referencedTableId: product + - dataType: TIMESTAMP + isPrimaryKey: false + name: return_date + - dataType: STRING + isPrimaryKey: true + name: return_id + - dataType: NUMERIC + isPrimaryKey: false + name: return_unit_cost + - dataType: NUMERIC + isPrimaryKey: false + name: return_unit_paid_amount + - dataType: NUMERIC + isPrimaryKey: false + name: return_unit_quantity + - dataType: STRING + isPrimaryKey: false + name: wdf__client_id +id: returns +path: + - cicd_output_stage + - returns +type: TABLE diff --git a/gooddata-dbt/tests/resources/gooddata_layouts/pdm/telco_customer_churn.yaml b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/telco_customer_churn.yaml new file mode 100644 index 000000000..7e006cdf1 --- /dev/null +++ b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/telco_customer_churn.yaml @@ -0,0 +1,70 @@ +# (C) 2023 GoodData Corporation +columns: + - dataType: STRING + isPrimaryKey: false + name: churn + - dataType: STRING + isPrimaryKey: false + name: contract + - dataType: STRING + isPrimaryKey: true + name: customerid + - dataType: STRING + isPrimaryKey: false + name: dependents + - dataType: STRING + isPrimaryKey: false + name: deviceprotection + - dataType: STRING + isPrimaryKey: false + name: gender + - dataType: STRING + isPrimaryKey: false + name: internetservice + - dataType: NUMERIC + isPrimaryKey: false + name: monthlycharges + - dataType: STRING + isPrimaryKey: false + name: multiplelines + - dataType: STRING + isPrimaryKey: false + name: onlinebackup + - dataType: STRING + isPrimaryKey: false + name: onlinesecurity + - dataType: STRING + isPrimaryKey: false + name: paperlessbilling + - dataType: STRING + isPrimaryKey: false + name: partner + - dataType: STRING + isPrimaryKey: false + name: paymentmethod + - dataType: STRING + isPrimaryKey: false + name: phoneservice + - dataType: STRING + isPrimaryKey: false + name: seniorcitizen + - dataType: STRING + isPrimaryKey: false + name: streamingmovies + - dataType: STRING + isPrimaryKey: false + name: streamingtv + - dataType: STRING + isPrimaryKey: false + name: techsupport + - dataType: STRING + isPrimaryKey: false + name: tenure + - dataType: STRING + isPrimaryKey: false + name: totalcharges +id: telco_customer_churn +path: + - cicd_output_stage + - telco_customer_churn +type: TABLE diff --git a/gooddata-dbt/tests/resources/gooddata_layouts/pdm/users.yaml b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/users.yaml new file mode 100644 index 000000000..66930272e --- /dev/null +++ b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/users.yaml @@ -0,0 +1,22 @@ +# (C) 2023 GoodData Corporation +columns: + - dataType: TIMESTAMP + isPrimaryKey: false + name: last_updated + - dataType: STRING + isPrimaryKey: false + name: login + - dataType: STRING + isPrimaryKey: false + name: user_avatar_url + - dataType: INT + isPrimaryKey: true + name: user_id + - dataType: STRING + isPrimaryKey: false + name: user_url +id: users +path: + - cicd_output_stage + - users +type: TABLE diff --git a/gooddata-dbt/tests/resources/gooddata_layouts/pdm/workflow_runs.yaml b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/workflow_runs.yaml new file mode 100644 index 000000000..729e578a1 --- /dev/null +++ b/gooddata-dbt/tests/resources/gooddata_layouts/pdm/workflow_runs.yaml @@ -0,0 +1,33 @@ +# (C) 2023 GoodData Corporation +columns: + - dataType: TIMESTAMP + isPrimaryKey: false + name: created_at + - dataType: INT + isPrimaryKey: false + name: repo_id + referencedTableColumn: repo_id + referencedTableId: repos + - dataType: TIMESTAMP + isPrimaryKey: false + name: updated_at + - dataType: NUMERIC + isPrimaryKey: false + name: workflow_run_duration + - dataType: STRING + isPrimaryKey: false + name: workflow_run_event + - dataType: INT + isPrimaryKey: true + name: workflow_run_id + - dataType: STRING + isPrimaryKey: false + name: workflow_run_status + - dataType: STRING + isPrimaryKey: false + name: workflow_run_url +id: workflow_runs +path: + - cicd_output_stage + - workflow_runs +type: TABLE diff --git a/gooddata-dbt/tests/test_tables.py b/gooddata-dbt/tests/test_tables.py index e21a3b62c..5b19f0f5a 100644 --- a/gooddata-dbt/tests/test_tables.py +++ b/gooddata-dbt/tests/test_tables.py @@ -5,8 +5,12 @@ from gooddata_dbt.dbt.tables import DbtModelTables +from gooddata_sdk import CatalogDeclarativeModel, CatalogDeclarativeTables + _CURR_DIR = Path(__file__).parent _MANIFEST_PATH = _CURR_DIR / "resources/dbt_target/manifest.json" +_PDM_PATH = _CURR_DIR / "resources/gooddata_layouts" +MODEL_ID = "github" def _read_json(path: Union[str, Path]) -> Dict: @@ -15,20 +19,33 @@ def _read_json(path: Union[str, Path]) -> Dict: def test_load_tables(): - tables = DbtModelTables.from_local(upper_case=False, all_model_ids=["github"], manifest_path=_MANIFEST_PATH) + tables = DbtModelTables.from_local(upper_case=False, all_model_ids=[MODEL_ID], manifest_path=_MANIFEST_PATH) - assert len(tables.tables) == 5 - assert len(tables.tables[0].columns) == 4 + assert len(tables.tables) == 4 + assert len(tables.tables[0].columns) == 6 - assert tables.tables[0].name == "exchange_rate" + assert tables.tables[0].name == "commits" assert tables.tables[0].columns["created_at"].name == "created_at" def test_load_tables_upper_case(): - tables = DbtModelTables.from_local(upper_case=True, all_model_ids=["github"], manifest_path=_MANIFEST_PATH) + tables = DbtModelTables.from_local(upper_case=True, all_model_ids=[MODEL_ID], manifest_path=_MANIFEST_PATH) - assert len(tables.tables) == 5 - assert len(tables.tables[0].columns) == 4 + assert len(tables.tables) == 4 + assert len(tables.tables[0].columns) == 6 - assert tables.tables[0].name == "EXCHANGE_RATE" + assert tables.tables[0].name == "COMMITS" assert tables.tables[0].columns["created_at"].name == "CREATED_AT" + + +def test_make_ldm(): + tables = DbtModelTables.from_local(upper_case=False, all_model_ids=[MODEL_ID], manifest_path=_MANIFEST_PATH) + scan_pdm = CatalogDeclarativeTables.load_from_disk(_PDM_PATH) + tables.set_data_types(scan_pdm) + data_source_id = "postgres" + + declarative_datasets = tables.make_declarative_datasets(data_source_id, [MODEL_ID]) + ldm = CatalogDeclarativeModel.from_dict({"ldm": declarative_datasets}, camel_case=False) + + assert len(ldm.ldm.datasets) == 4 + assert len(ldm.ldm.date_instances) == 4 From 196ad566acfb3711d34cfbf84053681f95d6507a Mon Sep 17 00:00:00 2001 From: Jacek Date: Tue, 7 Nov 2023 17:47:32 +0100 Subject: [PATCH 3/5] TRIVIAL: gooddata-dbt - fix and complete gooddata_example.yml --- gooddata-dbt/tests/gooddata_example.yml | 26 ++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/gooddata-dbt/tests/gooddata_example.yml b/gooddata-dbt/tests/gooddata_example.yml index 5af0b0a4c..85e992965 100644 --- a/gooddata-dbt/tests/gooddata_example.yml +++ b/gooddata-dbt/tests/gooddata_example.yml @@ -19,9 +19,15 @@ data_products: - id: sales name: "Sales" environment_setup_id: default - model_id: github + model_ids: - github - faa + # Example of why and how to use skip_tests: + # TODO - this insight cannot be calculated by Python SDK: + # The limit for maximum size of the dimension without metrics was exceeded(100_000). + # Fix it either in Python SDK or in the insight + skip_tests: + - "insight_1" localization: from_language: en to: @@ -29,3 +35,21 @@ data_products: language: fr - locale: zh-Hans language: "chinese (simplified)" + - id: marketing + name: "Sales" + environment_setup_id: default + model_ids: + - faa + + +# It is possible to deliver to multiple GoodData organizations +# You have to set up a gooddata profile file and configure all organizations there +# Related DOC - https://www.gooddata.com/docs/python-sdk/latest/getting-started/ +organizations: + - gooddata_profile: local + data_product_ids: + - sales + - gooddata_profile: production + data_product_ids: + - sales + - marketing From 5a66a06140a254a228086fd1f67ddf092ad85855 Mon Sep 17 00:00:00 2001 From: Jacek Date: Fri, 10 Nov 2023 14:47:00 +0100 Subject: [PATCH 4/5] TRIVIAL: gooddata-dbt - run test insights in parallel Parallelism configurable in gooddata.yml --- gooddata-dbt/gooddata_dbt/dbt_plugin.py | 77 ++++++++++++++++---- gooddata-dbt/gooddata_dbt/gooddata/config.py | 6 ++ gooddata-dbt/tests/gooddata_example.yml | 4 + 3 files changed, 74 insertions(+), 13 deletions(-) diff --git a/gooddata-dbt/gooddata_dbt/dbt_plugin.py b/gooddata-dbt/gooddata_dbt/dbt_plugin.py index 905a6962a..63d4a3de1 100644 --- a/gooddata-dbt/gooddata_dbt/dbt_plugin.py +++ b/gooddata-dbt/gooddata_dbt/dbt_plugin.py @@ -1,8 +1,9 @@ # (C) 2023 GoodData Corporation +import asyncio import logging import os -import sys from argparse import Namespace +from asyncio import Semaphore from pathlib import Path from time import time from typing import List, Optional @@ -18,7 +19,7 @@ from gooddata_dbt.sdk_wrapper import GoodDataSdkWrapper from gooddata_dbt.utils import report_message_to_merge_request -from gooddata_sdk import CatalogDeclarativeModel, CatalogScanModelRequest, CatalogWorkspace, GoodDataSdk +from gooddata_sdk import CatalogDeclarativeModel, CatalogScanModelRequest, CatalogWorkspace, GoodDataSdk, Insight # TODO - upgrade AIO, cleanup, start from scratch, test everything @@ -129,22 +130,69 @@ def store_analytics( ) -def test_insights(logger: logging.Logger, sdk: GoodDataSdk, workspace_id: str, skip_tests: Optional[List[str]]) -> None: +async def execute_insight(sdk: GoodDataSdk, workspace_id: str, insight: Insight) -> None: + sdk.tables.for_insight(workspace_id, insight) + + +async def test_insight( + logger: logging.Logger, + sdk: GoodDataSdk, + workspace_id: str, + insight: Insight, +) -> dict: + logger.info(f"Executing insight {insight.id=} {insight.title=} ...") + start = time() + try: + await execute_insight(sdk, workspace_id, insight) + duration = int((time() - start) * 1000) + logger.info(f"Test successful {insight.id=} {insight.title=} duration={duration}(ms)") + return {"id": insight.id, "title": insight.title, "duration": duration, "status": "success"} + except Exception as e: + duration = int((time() - start) * 1000) + logger.error(f"Test failed {insight.id=} {insight.title=} duration={duration}(ms) reason={str(e)}") + return {"id": insight.id, "title": insight.title, "duration": duration, "status": "failed", "reason": str(e)} + + +async def safe_test_insight( + logger: logging.Logger, + sdk: GoodDataSdk, + workspace_id: str, + insight: Insight, + semaphore: Semaphore, +) -> dict: + async with semaphore: # semaphore limits num of simultaneous executions + return await test_insight( + logger, + sdk, + workspace_id, + insight, + ) + + +async def test_insights( + logger: logging.Logger, + sdk: GoodDataSdk, + workspace_id: str, + skip_tests: Optional[List[str]], + test_insights_parallelism: int = 1, +) -> None: + start = time() logger.info(f"Test insights {workspace_id=}") insights = sdk.insights.get_insights(workspace_id) - + semaphore = asyncio.Semaphore(test_insights_parallelism) + tasks = [] for insight in insights: - logger.info(f"Executing insight {insight.id=} {insight.title=} ...") if skip_tests is not None and insight.id in skip_tests: logger.info(f"Skip test insight={insight.title} (requested in gooddata.yaml)") else: - try: - start = time() - sdk.tables.for_insight(workspace_id, insight) - duration = int((time() - start) * 1000) - logger.info(f"Test successful {insight.id=} {insight.title=} duration={duration}(ms)") - except RuntimeError: - sys.exit() + tasks.append(safe_test_insight(logger, sdk, workspace_id, insight, semaphore)) + results = await asyncio.gather(*tasks) + duration = int((time() - start) * 1000) + errors = [result for result in results if result["status"] == "failed"] + if len(errors) > 0: + raise Exception(f"Test insights failed {workspace_id=} duration={duration}(ms) errors={errors}") + else: + logger.info(f"Test insights finished {workspace_id=} duration={duration}(ms)") def create_localized_workspaces(data_product: GoodDataConfigProduct, sdk: GoodDataSdk, workspace_id: str) -> None: @@ -309,7 +357,10 @@ def process_organization( elif args.method == "deploy_analytics": deploy_analytics(logger, sdk_wrapper, workspace_id, data_product) elif args.method == "test_insights": - test_insights(logger, sdk_wrapper.sdk, workspace_id, data_product.skip_tests) + parallelism = gd_config.global_properties.test_insights_parallelism or 1 + asyncio.run( + test_insights(logger, sdk_wrapper.sdk, workspace_id, data_product.skip_tests, parallelism) + ) else: raise Exception(f"Unsupported method requested in args: {args.method}") diff --git a/gooddata-dbt/gooddata_dbt/gooddata/config.py b/gooddata-dbt/gooddata_dbt/gooddata/config.py index 7f70001de..aa23cfb1d 100644 --- a/gooddata-dbt/gooddata_dbt/gooddata/config.py +++ b/gooddata-dbt/gooddata_dbt/gooddata/config.py @@ -47,11 +47,17 @@ class GoodDataConfigOrganization(Base): data_product_ids: List[str] = attr.field(default=list) +@attrs.define(auto_attribs=True, kw_only=True) +class GoodDataGlobalConfig(Base): + test_insights_parallelism: Optional[int] = 1 + + @attrs.define(auto_attribs=True, kw_only=True) class GoodDataConfig(Base): environment_setups: List[GoodDataConfigEnvironmentSetup] data_products: List[GoodDataConfigProduct] organizations: List[GoodDataConfigOrganization] + global_properties: GoodDataGlobalConfig @property def all_model_ids(self) -> List[str]: diff --git a/gooddata-dbt/tests/gooddata_example.yml b/gooddata-dbt/tests/gooddata_example.yml index 85e992965..6d6159efe 100644 --- a/gooddata-dbt/tests/gooddata_example.yml +++ b/gooddata-dbt/tests/gooddata_example.yml @@ -53,3 +53,7 @@ organizations: data_product_ids: - sales - marketing + +# Global configuration for all data products +global_properties: + test_insights_parallelism: 2 From 5503a44bc4fe9fc5735fdc9a719c0ba5cb733f43 Mon Sep 17 00:00:00 2001 From: Jacek Date: Mon, 13 Nov 2023 10:36:43 +0100 Subject: [PATCH 5/5] TRIVIAL: gooddata-dbt - utility function for get duration plus unify F strings --- gooddata-dbt/gooddata_dbt/dbt_plugin.py | 12 ++++++------ gooddata-dbt/gooddata_dbt/utils.py | 5 +++++ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/gooddata-dbt/gooddata_dbt/dbt_plugin.py b/gooddata-dbt/gooddata_dbt/dbt_plugin.py index 63d4a3de1..a437f01ec 100644 --- a/gooddata-dbt/gooddata_dbt/dbt_plugin.py +++ b/gooddata-dbt/gooddata_dbt/dbt_plugin.py @@ -17,7 +17,7 @@ from gooddata_dbt.gooddata.config import GoodDataConfig, GoodDataConfigOrganization, GoodDataConfigProduct from gooddata_dbt.logger import get_logger from gooddata_dbt.sdk_wrapper import GoodDataSdkWrapper -from gooddata_dbt.utils import report_message_to_merge_request +from gooddata_dbt.utils import get_duration, report_message_to_merge_request from gooddata_sdk import CatalogDeclarativeModel, CatalogScanModelRequest, CatalogWorkspace, GoodDataSdk, Insight @@ -144,11 +144,11 @@ async def test_insight( start = time() try: await execute_insight(sdk, workspace_id, insight) - duration = int((time() - start) * 1000) + duration = get_duration(start) logger.info(f"Test successful {insight.id=} {insight.title=} duration={duration}(ms)") return {"id": insight.id, "title": insight.title, "duration": duration, "status": "success"} except Exception as e: - duration = int((time() - start) * 1000) + duration = get_duration(start) logger.error(f"Test failed {insight.id=} {insight.title=} duration={duration}(ms) reason={str(e)}") return {"id": insight.id, "title": insight.title, "duration": duration, "status": "failed", "reason": str(e)} @@ -187,12 +187,12 @@ async def test_insights( else: tasks.append(safe_test_insight(logger, sdk, workspace_id, insight, semaphore)) results = await asyncio.gather(*tasks) - duration = int((time() - start) * 1000) + duration = get_duration(start) errors = [result for result in results if result["status"] == "failed"] if len(errors) > 0: - raise Exception(f"Test insights failed {workspace_id=} duration={duration}(ms) errors={errors}") + raise Exception(f"Test insights failed {workspace_id=} {duration=}(ms) {errors=}") else: - logger.info(f"Test insights finished {workspace_id=} duration={duration}(ms)") + logger.info(f"Test insights finished {workspace_id=} {duration=}(ms)") def create_localized_workspaces(data_product: GoodDataConfigProduct, sdk: GoodDataSdk, workspace_id: str) -> None: diff --git a/gooddata-dbt/gooddata_dbt/utils.py b/gooddata-dbt/gooddata_dbt/utils.py index e38f1eb67..bdcef60bf 100644 --- a/gooddata-dbt/gooddata_dbt/utils.py +++ b/gooddata-dbt/gooddata_dbt/utils.py @@ -1,5 +1,6 @@ # (C) 2023 GoodData Corporation import os +import time import requests @@ -21,3 +22,7 @@ def report_message_to_merge_request(token: str, text: str) -> None: headers = {"PRIVATE-TOKEN": token, "Content-Type": "application/json"} data = {"body": text} post_gitlab_rest(url, headers, data) + + +def get_duration(start: float) -> int: + return int((time.time() - start) * 1000)