diff --git a/docs/content/api/modules.json.gz b/docs/content/api/modules.json.gz index 01343aaf12cf4..aefacfba2337b 100644 Binary files a/docs/content/api/modules.json.gz and b/docs/content/api/modules.json.gz differ diff --git a/docs/content/api/searchindex.json.gz b/docs/content/api/searchindex.json.gz index a71bd248da98c..a96de8ab1fbbf 100644 Binary files a/docs/content/api/searchindex.json.gz and b/docs/content/api/searchindex.json.gz differ diff --git a/docs/content/api/sections.json.gz b/docs/content/api/sections.json.gz index d61cd91c5b0ea..1ea256c5cce72 100644 Binary files a/docs/content/api/sections.json.gz and b/docs/content/api/sections.json.gz differ diff --git a/docs/content/integrations/powerbi.mdx b/docs/content/integrations/powerbi.mdx new file mode 100644 index 0000000000000..cf2152398dfa4 --- /dev/null +++ b/docs/content/integrations/powerbi.mdx @@ -0,0 +1,167 @@ +--- +title: "Using Dagster with Power BI" +description: Represent your Power BI assets in Dagster +--- + +# Using Dagster with Power BI + + + +This guide provides instructions for using Dagster with Power BI. Your Power BI assets, such as semantic models, data sources, reports, and dashboards, can be represented in the Dagster asset graph, allowing you to track lineage and dependencies between Power BI assets and upstream data assets you are already modeling in Dagster. You can also use Dagster to orchestrate Power BI semantic models, allowing you to trigger refreshes of these models on a cadence or based on upstream data changes. + +## What you'll learn + +- How to represent Power BI assets in the Dagster asset graph, including lineage to other Dagster assets. +- How to customize asset definition metadata for these Power BI assets. +- How to materialize Power BI semantic models from Dagster. +- How to customize how Power BI semantic models are materialized. + +
+ Prerequisites + +- Familiarity with asset definitions and the Dagster asset graph +- Familiarity with Dagster resources - Familiarity with Power BI concepts, like semantic models, data sources, reports, and dashboards +- A Power BI workspace +- A service principal configured to access Power BI, or an API access token. For more information, see [Embed Power BI content with service principal and an application secret](https://learn.microsoft.com/en-us/power-bi/developer/embedded/embed-service-principal) in the Power BI documentation. + +
+ +## Represent Power BI assets in the asset graph + +To load Power BI assets into the Dagster asset graph, you must first construct a `PowerBIWorkspace` resource, which allows Dagster to communicate with your Power BI workspace. You'll need to supply your workspace ID and credentials. You may configure a service principal or use an API access token, which can be passed directly or accessed from the environment using `EnvVar`. + +Dagster can automatically load all semantic models, data sources, reports, and dashboards from your Power BI workspace. Call the `build_defs()` function, which returns a `Definitions` object containing all the asset definitions for these Power BI assets. + +```python file=/integrations/power-bi/representing-power-bi-assets.py +import uuid +from http import client +from typing import cast + +from dagster_powerbi import PowerBIServicePrincipal, PowerBIToken, PowerBIWorkspace + +from dagster import Definitions, EnvVar, asset, define_asset_job + +# Connect using a service principal +resource = PowerBIWorkspace( + credentials=PowerBIServicePrincipal( + client_id=EnvVar("POWER_BI_CLIENT_ID"), + client_secret=EnvVar("POWER_BI_CLIENT_SECRET"), + tenant_id=EnvVar("POWER_BI_TENANT_ID"), + ), + workspace_id=EnvVar("POWER_BI_WORKSPACE_ID"), +) + +# Alternatively, connect directly using an API access token +resource = PowerBIWorkspace( + credentials=PowerBIToken(api_token=EnvVar("POWER_BI_API_TOKEN")), + workspace_id=EnvVar("POWER_BI_WORKSPACE_ID"), +) + +defs = resource.build_defs() +``` + +### Customize asset definition metadata for Power BI assets + +By default, Dagster will generate asset keys for each Power BI asset based on its type and name and populate default metadata. You can further customize asset properties by passing a custom `DagsterPowerBITranslator` subclass to the `build_defs()` function. This subclass can implement methods to customize the asset keys or specs for each Power BI asset type. + +```python file=/integrations/power-bi/customize-power-bi-asset-defs.py +from dagster_powerbi import ( + DagsterPowerBITranslator, + PowerBIServicePrincipal, + PowerBIWorkspace, +) +from dagster_powerbi.translator import PowerBIContentData + +from dagster import EnvVar +from dagster._core.definitions.asset_key import AssetKey +from dagster._core.definitions.asset_spec import AssetSpec + +resource = PowerBIWorkspace( + credentials=PowerBIServicePrincipal( + client_id=EnvVar("POWER_BI_CLIENT_ID"), + client_secret=EnvVar("POWER_BI_CLIENT_SECRET"), + tenant_id=EnvVar("POWER_BI_TENANT_ID"), + ), + workspace_id=EnvVar("POWER_BI_WORKSPACE_ID"), +) + + +# A translator class lets us customize properties of the built +# Power BI assets, such as the owners or asset key +class MyCustomPowerBITranslator(DagsterPowerBITranslator): + def get_report_spec(self, data: PowerBIContentData) -> AssetSpec: + # We add a team owner tag to all reports + return super().get_report_spec(data)._replace(owners=["my_team"]) + + def get_semantic_model_spec(self, data: PowerBIContentData) -> AssetSpec: + return super().get_semantic_model_spec(data)._replace(owners=["my_team"]) + + def get_dashboard_spec(self, data: PowerBIContentData) -> AssetSpec: + return super().get_dashboard_spec(data)._replace(owners=["my_team"]) + + def get_dashboard_asset_key(self, data: PowerBIContentData) -> AssetKey: + # We prefix all dashboard asset keys with "powerbi" for organizational + # purposes + return super().get_dashboard_asset_key(data).with_prefix("powerbi") + + +defs = resource.build_defs(dagster_powerbi_translator=MyCustomPowerBITranslator) +``` + +### Load Power BI assets from multiple workspaces + +Definitions from multiple Power BI workspaces can be combined by instantiating multiple `PowerBIWorkspace` resources and merging their definitions. This lets you view all your Power BI assets in a single asset graph: + +```python file=/integrations/power-bi/multiple-power-bi-workspaces.py +from dagster_powerbi import PowerBIServicePrincipal, PowerBIWorkspace + +from dagster import Definitions, EnvVar + +credentials = PowerBIServicePrincipal( + client_id=EnvVar("POWER_BI_CLIENT_ID"), + client_secret=EnvVar("POWER_BI_CLIENT_SECRET"), + tenant_id=EnvVar("POWER_BI_TENANT_ID"), +) + +sales_team_workspace = PowerBIWorkspace( + credentials=credentials, + workspace_id="726c94ff-c408-4f43-8edf-61fbfa1753c7", +) + +marketing_team_workspace = PowerBIWorkspace( + credentials=credentials, + workspace_id="8b7f815d-4e64-40dd-993c-cfa4fb12edee", +) + +# We use Definitions.merge to combine the definitions from both workspaces +# into a single set of definitions to load +defs = Definitions.merge( + sales_team_workspace.build_defs(), + marketing_team_workspace.build_defs(), +) +``` + +## Materialize Power BI semantic models from Dagster + +Dagster's default behavior is to pull in representations of Power BI semantic models as external assets, which appear in the asset graph but can't be materialized. However, you can instruct Dagster to allow you to materialize these semantic models, refreshing them, by passing `enable_refresh_semantic_models=True` to the `build_defs()` function: + +```python file=/integrations/power-bi/materialize-semantic-models.py +import uuid +from typing import cast + +from dagster_powerbi import PowerBIServicePrincipal, PowerBIWorkspace + +from dagster import Definitions, EnvVar, asset, define_asset_job + +resource = PowerBIWorkspace( + credentials=PowerBIServicePrincipal( + client_id=EnvVar("POWER_BI_CLIENT_ID"), + client_secret=EnvVar("POWER_BI_CLIENT_SECRET"), + tenant_id=EnvVar("POWER_BI_TENANT_ID"), + ), + workspace_id=EnvVar("POWER_BI_WORKSPACE_ID"), +) +defs = resource.build_defs(enable_refresh_semantic_models=True) +``` + +You can then add these semantic models to jobs or as targets of Dagster sensors or schedules to trigger refreshes of the models on a cadence or based on other conditions. diff --git a/docs/next/public/objects.inv b/docs/next/public/objects.inv index 036f4fd4d3e58..32b2e12c3778d 100644 Binary files a/docs/next/public/objects.inv and b/docs/next/public/objects.inv differ diff --git a/docs/vale/styles/config/vocabularies/Dagster/accept.txt b/docs/vale/styles/config/vocabularies/Dagster/accept.txt index 1be9d2d50984b..aee006a72a11b 100644 --- a/docs/vale/styles/config/vocabularies/Dagster/accept.txt +++ b/docs/vale/styles/config/vocabularies/Dagster/accept.txt @@ -78,6 +78,7 @@ PingOne Polars Postgres Prometheus +Power BI Pydantic RBAC RDS @@ -133,3 +134,15 @@ uncomment unpartitioned vCPU vCPUs +we have + + +SLA +SLAs +performant +SOC +GDPR +HIPAA +IAM +ECS +AWS diff --git a/examples/docs_beta_snippets/tox.ini b/examples/docs_beta_snippets/tox.ini index 388fb39ca6364..516ae854d77ad 100644 --- a/examples/docs_beta_snippets/tox.ini +++ b/examples/docs_beta_snippets/tox.ini @@ -45,10 +45,11 @@ deps = integrations: -e ../../python_modules/libraries/dagster-census integrations: -e ../../python_modules/libraries/dagster-msteams integrations: -e ../../python_modules/libraries/dagster-msteams - integrations: -e ../../python_modules/libraries/dagster-sdf + integrations: -e ../../python_modules/libraries/dagster-sdf integrations: -e ../../python_modules/libraries/dagster-looker integrations: -e ../../python_modules/libraries/dagster-prometheus integrations: -e ../../python_modules/libraries/dagster-openai + integrations: -e ../../python_modules/libraries/dagster-powerbi -e . allowlist_externals = /bin/bash diff --git a/examples/docs_snippets/docs_snippets/integrations/power-bi/customize-power-bi-asset-defs.py b/examples/docs_snippets/docs_snippets/integrations/power-bi/customize-power-bi-asset-defs.py new file mode 100644 index 0000000000000..ecde7ac6f5036 --- /dev/null +++ b/examples/docs_snippets/docs_snippets/integrations/power-bi/customize-power-bi-asset-defs.py @@ -0,0 +1,41 @@ +from dagster_powerbi import ( + DagsterPowerBITranslator, + PowerBIServicePrincipal, + PowerBIWorkspace, +) +from dagster_powerbi.translator import PowerBIContentData + +from dagster import EnvVar +from dagster._core.definitions.asset_key import AssetKey +from dagster._core.definitions.asset_spec import AssetSpec + +resource = PowerBIWorkspace( + credentials=PowerBIServicePrincipal( + client_id=EnvVar("POWER_BI_CLIENT_ID"), + client_secret=EnvVar("POWER_BI_CLIENT_SECRET"), + tenant_id=EnvVar("POWER_BI_TENANT_ID"), + ), + workspace_id=EnvVar("POWER_BI_WORKSPACE_ID"), +) + + +# A translator class lets us customize properties of the built +# Power BI assets, such as the owners or asset key +class MyCustomPowerBITranslator(DagsterPowerBITranslator): + def get_report_spec(self, data: PowerBIContentData) -> AssetSpec: + # We add a team owner tag to all reports + return super().get_report_spec(data)._replace(owners=["my_team"]) + + def get_semantic_model_spec(self, data: PowerBIContentData) -> AssetSpec: + return super().get_semantic_model_spec(data)._replace(owners=["my_team"]) + + def get_dashboard_spec(self, data: PowerBIContentData) -> AssetSpec: + return super().get_dashboard_spec(data)._replace(owners=["my_team"]) + + def get_dashboard_asset_key(self, data: PowerBIContentData) -> AssetKey: + # We prefix all dashboard asset keys with "powerbi" for organizational + # purposes + return super().get_dashboard_asset_key(data).with_prefix("powerbi") + + +defs = resource.build_defs(dagster_powerbi_translator=MyCustomPowerBITranslator) diff --git a/examples/docs_snippets/docs_snippets/integrations/power-bi/materialize-semantic-models.py b/examples/docs_snippets/docs_snippets/integrations/power-bi/materialize-semantic-models.py new file mode 100644 index 0000000000000..96be0feb5cd5a --- /dev/null +++ b/examples/docs_snippets/docs_snippets/integrations/power-bi/materialize-semantic-models.py @@ -0,0 +1,16 @@ +import uuid +from typing import cast + +from dagster_powerbi import PowerBIServicePrincipal, PowerBIWorkspace + +from dagster import Definitions, EnvVar, asset, define_asset_job + +resource = PowerBIWorkspace( + credentials=PowerBIServicePrincipal( + client_id=EnvVar("POWER_BI_CLIENT_ID"), + client_secret=EnvVar("POWER_BI_CLIENT_SECRET"), + tenant_id=EnvVar("POWER_BI_TENANT_ID"), + ), + workspace_id=EnvVar("POWER_BI_WORKSPACE_ID"), +) +defs = resource.build_defs(enable_refresh_semantic_models=True) diff --git a/examples/docs_snippets/docs_snippets/integrations/power-bi/multiple-power-bi-workspaces.py b/examples/docs_snippets/docs_snippets/integrations/power-bi/multiple-power-bi-workspaces.py new file mode 100644 index 0000000000000..7f39e07b11116 --- /dev/null +++ b/examples/docs_snippets/docs_snippets/integrations/power-bi/multiple-power-bi-workspaces.py @@ -0,0 +1,26 @@ +from dagster_powerbi import PowerBIServicePrincipal, PowerBIWorkspace + +from dagster import Definitions, EnvVar + +credentials = PowerBIServicePrincipal( + client_id=EnvVar("POWER_BI_CLIENT_ID"), + client_secret=EnvVar("POWER_BI_CLIENT_SECRET"), + tenant_id=EnvVar("POWER_BI_TENANT_ID"), +) + +sales_team_workspace = PowerBIWorkspace( + credentials=credentials, + workspace_id="726c94ff-c408-4f43-8edf-61fbfa1753c7", +) + +marketing_team_workspace = PowerBIWorkspace( + credentials=credentials, + workspace_id="8b7f815d-4e64-40dd-993c-cfa4fb12edee", +) + +# We use Definitions.merge to combine the definitions from both workspaces +# into a single set of definitions to load +defs = Definitions.merge( + sales_team_workspace.build_defs(), + marketing_team_workspace.build_defs(), +) diff --git a/examples/docs_snippets/docs_snippets/integrations/power-bi/representing-power-bi-assets.py b/examples/docs_snippets/docs_snippets/integrations/power-bi/representing-power-bi-assets.py new file mode 100644 index 0000000000000..a413d6dd47e06 --- /dev/null +++ b/examples/docs_snippets/docs_snippets/integrations/power-bi/representing-power-bi-assets.py @@ -0,0 +1,25 @@ +import uuid +from http import client +from typing import cast + +from dagster_powerbi import PowerBIServicePrincipal, PowerBIToken, PowerBIWorkspace + +from dagster import Definitions, EnvVar, asset, define_asset_job + +# Connect using a service principal +resource = PowerBIWorkspace( + credentials=PowerBIServicePrincipal( + client_id=EnvVar("POWER_BI_CLIENT_ID"), + client_secret=EnvVar("POWER_BI_CLIENT_SECRET"), + tenant_id=EnvVar("POWER_BI_TENANT_ID"), + ), + workspace_id=EnvVar("POWER_BI_WORKSPACE_ID"), +) + +# Alternatively, connect directly using an API access token +resource = PowerBIWorkspace( + credentials=PowerBIToken(api_token=EnvVar("POWER_BI_API_TOKEN")), + workspace_id=EnvVar("POWER_BI_WORKSPACE_ID"), +) + +defs = resource.build_defs() diff --git a/examples/docs_snippets/tox.ini b/examples/docs_snippets/tox.ini index 3c36a3440afd8..7e2d363d63189 100644 --- a/examples/docs_snippets/tox.ini +++ b/examples/docs_snippets/tox.ini @@ -35,6 +35,7 @@ deps = -e ../../python_modules/libraries/dagster-k8s -e ../../python_modules/libraries/dagster-pandas -e ../../python_modules/libraries/dagster-postgres + -e ../../python_modules/libraries/dagster-powerbi -e ../../python_modules/libraries/dagster-pyspark -e ../../python_modules/libraries/dagster-slack -e ../../python_modules/libraries/dagster-gcp-pandas diff --git a/scripts/build_and_publish.sh b/scripts/build_and_publish.sh new file mode 100755 index 0000000000000..324b73353c7d0 --- /dev/null +++ b/scripts/build_and_publish.sh @@ -0,0 +1,86 @@ +# How to release: +# 2. ensure you have an API key from the elementl PyPI account (account is in the password manager) +# 3. run make adhoc_pypi from the root of the dagster-airlift directory +# 4. once propted, use '__token__' for the username and the API key for the password + +# Define the path to the .pypirc file +PYPIRC_FILE="$HOME/.pypirc" + +PACKAGE_TO_RELEASE_PATH=$1 +VERSION_TO_RELEASE=$2 + +if [ -z "$PACKAGE_TO_RELEASE_PATH" ]; then + echo "Please provide the path to the package to release." + exit 1 +fi +if [ -z "$VERSION_TO_RELEASE" ]; then + echo "Please provide the version to release." + exit 1 +fi + +# Define cleanup function +cleanup() { + echo "Cleaning up..." + rm -rf dist/* +} + +# Set trap to call cleanup function on script exit +trap cleanup EXIT + +# Check if the .pypirc file exists +if [ ! -f "$PYPIRC_FILE" ]; then + echo ".pypirc file not found in $HOME." + + # Prompt the user for the API token + read -p "Enter your API token (must start with 'pypi-'): " API_TOKEN + + # Check if the API token starts with 'pypi-' + if [[ $API_TOKEN != pypi-* ]]; then + echo "Invalid API token. It must start with 'pypi-'." + exit 1 + fi + + # Create the .pypirc file and write the configuration + cat < "$PYPIRC_FILE" +[pypi] +username = __token__ +password = $API_TOKEN +EOF + + echo ".pypirc file created successfully." +else + echo ".pypirc file already exists in $HOME. Using that as pypi credentials." +fi + +rm -rf dist/* +rm -rf package_prerelease +mkdir -p package_prerelease +cp -R $PACKAGE_TO_RELEASE_PATH/* package_prerelease +pushd package_prerelease + +# Update both a hardcoded version, if set, in setup.py, and +# find where __version__ is set and update it +sed -i "" "s|return \"1!0+dev\"|return \"$VERSION_TO_RELEASE\"|" setup.py +grep -rl "__version__ = \"1!0+dev\"" ./ | xargs sed -i "" "s|\"1!0+dev\"|\"$VERSION_TO_RELEASE\"|" + +echo "Building package..." +python3 -m build +echo "Uploading to pypi..." +# Capture the output of the twine upload command +TWINE_OUTPUT=$(python3 -m twine upload --repository pypi dist/* --verbose 2>&1) +TWINE_EXIT_CODE=$? + +# Check if the output contains a 400 error +if echo "$TWINE_OUTPUT" | grep -q "400 Bad Request"; then + echo "Error: Twine upload failed with a 400 Bad Request error." + echo "Twine output:" + echo "$TWINE_OUTPUT" + exit 1 +elif [ $TWINE_EXIT_CODE -ne 0 ]; then + echo "Error: Twine upload failed with exit code $TWINE_EXIT_CODE." + echo "Twine output:" + echo "$TWINE_OUTPUT" + exit $TWINE_EXIT_CODE +fi + +echo "Upload successful."