diff --git a/.gitattributes b/.gitattributes index 84ef90f32fb6b..dbdb53bb0d1b3 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1,2 @@ -*.py diff=python \ No newline at end of file +*.py diff=python +**/uv.lock linguist-generated diff --git a/CHANGES.md b/CHANGES.md index ff54736810ef7..911f8fe2c6e34 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,33 @@ # Changelog +## 1.9.5 (core) / 0.25.5 (libraries) + +### New + +- The automatic run retry daemon has been updated so that there is a single source of truth for if a run will be retried and if the retry has been launched. Tags are now added to run at failure time indicating if the run will be retried by the automatic retry system. Once the automatic retry has been launched, the run ID of the retry is added to the original run. +- When canceling a backfill of a job, the backfill daemon will now cancel all runs launched by that backfill before marking the backfill as canceled. +- Dagster execution info (tags such as `dagster/run-id`, `dagster/code-location`, `dagster/user` and Dagster Cloud environment variables) typically attached to external resources are now available under `DagsterRun.dagster_execution_info`. +- `SensorReturnTypesUnion` is now exported for typing the output of sensor functions. +- [dagster-dbt] dbt seeds now get a valid code version (Thanks [@marijncv](https://github.com/marijncv)!). +- Manual and automatic retries of runs launched by backfills that occur while the backfill is still in progress are now incorporated into the backfill's status. +- Manual retries of runs launched by backfills are no longer considered part of the backfill if the backfill is complete when the retry is launched. +- [dagster-fivetran] Fivetran assets can now be materialized using the FivetranWorkspace.sync_and_poll(…) method in the definition of a `@fivetran_assets` decorator. +- [dagster-fivetran] `load_fivetran_asset_specs` has been updated to accept an instance of `DagsterFivetranTranslator` or custom subclass. +- [dagster-fivetran] The `fivetran_assets` decorator was added. It can be used with the `FivetranWorkspace` resource and `DagsterFivetranTranslator` translator to load Fivetran tables for a given connector as assets in Dagster. The `build_fivetran_assets_definitions` factory can be used to create assets for all the connectors in your Fivetran workspace. +- [dagster-aws] `ECSPipesClient.run` now waits up to 70 days for tasks completion (waiter parameters are configurable) (Thanks [@jenkoian](https://github.com/jenkoian)!) +- [dagster-dbt] Update dagster-dbt scaffold template to be compatible with uv (Thanks [@wingyplus](https://github.com/wingyplus)!). +- [dagster-airbyte] A `load_airbyte_cloud_asset_specs` function has + been added. It can be used with the `AirbyteCloudWorkspace` resource and `DagsterAirbyteTranslator` translator to load your Airbyte Cloud connection streams as external assets in Dagster. +- [ui] Add an icon for the `icechunk` kind. +- [ui] Improved ui for manual sensor/schedule evaluation. + +### Bugfixes + +- Fixed database locking bug for the `ConsolidatedSqliteEventLogStorage`, which is mostly used for tests. +- [dagster-aws] Fixed a bug in the ECSRunLauncher that prevented it from accepting a user-provided task definition when DAGSTER_CURRENT_IMAGE was not set in the code location. +- [ui] Fixed an issue that would sometimes cause the asset graph to fail to render on initial load. +- [ui] Fix global auto-materialize tick timeline when paginating. + ## 1.9.4 (core) / 0.25.4 (libraries) ### New diff --git a/docs/content/_navigation.json b/docs/content/_navigation.json index 1186bd69ff960..a784329e9fc60 100644 --- a/docs/content/_navigation.json +++ b/docs/content/_navigation.json @@ -1341,10 +1341,6 @@ { "title": "Migrating from Airflow", "path": "/guides/migrations/migrating-airflow-to-dagster" - }, - { - "title": "Observe your Airflow pipelines with Dagster", - "path": "/guides/migrations/observe-your-airflow-pipelines-with-dagster" } ] }, diff --git a/docs/content/api/modules.json.gz b/docs/content/api/modules.json.gz index babd11583e374..00caf49d509e0 100644 Binary files a/docs/content/api/modules.json.gz and b/docs/content/api/modules.json.gz differ diff --git a/docs/content/api/searchindex.json.gz b/docs/content/api/searchindex.json.gz index 5ca4d157c09eb..39b9eab48e8bb 100644 Binary files a/docs/content/api/searchindex.json.gz and b/docs/content/api/searchindex.json.gz differ diff --git a/docs/content/api/sections.json.gz b/docs/content/api/sections.json.gz index 1254495534a4f..c970656e88c06 100644 Binary files a/docs/content/api/sections.json.gz and b/docs/content/api/sections.json.gz differ diff --git a/docs/content/concepts/metadata-tags/kind-tags.mdx b/docs/content/concepts/metadata-tags/kind-tags.mdx index 3125a021b0c6e..d6bd0634af33b 100644 --- a/docs/content/concepts/metadata-tags/kind-tags.mdx +++ b/docs/content/concepts/metadata-tags/kind-tags.mdx @@ -124,6 +124,7 @@ Some kinds are given a branded icon in the UI. We currently support nearly 200 u | `go` | | | `google` | | | `googlecloud` | | +| `googledrive` | | | `googlesheets` | | | `graphql` | | | `greatexpectations` | | diff --git a/docs/content/dagster-plus/deployment/azure/acr-user-code.mdx b/docs/content/dagster-plus/deployment/azure/acr-user-code.mdx index b9922423bce14..93d7390bb3890 100644 --- a/docs/content/dagster-plus/deployment/azure/acr-user-code.mdx +++ b/docs/content/dagster-plus/deployment/azure/acr-user-code.mdx @@ -89,18 +89,26 @@ First, we'll need to generate a service principal for GitHub Actions to use to a az ad sp create-for-rbac --name "github-actions-acr" --role contributor --scopes /subscriptions//resourceGroups//providers/Microsoft.ContainerRegistry/registries/ ``` -This command will output a JSON object with the service principal details. Make sure to save the `appId`, `password`, and `tenant` values - we'll use them in the next step. +This command will output a JSON object with the service principal details. Make sure to save the `appId` and `password` values - we'll use them in the next step. ### Add secrets to your repository We'll add the service principal details as secrets in our repository. Go to your repository in GitHub, and navigate to `Settings` -> `Secrets`. Add the following secrets: +- `DAGSTER_CLOUD_API_TOKEN`: An agent token. For more details see [Managing agent tokens](/dagster-plus/account/managing-user-agent-tokens#managing-agent-tokens). - `AZURE_CLIENT_ID`: The `appId` from the service principal JSON object. - `AZURE_CLIENT_SECRET`: The `password` from the service principal JSON object. -### Update the workflow +### Update the GitHub Actions workflow -Finally, we'll update the workflow to use the service principal details. Open `.github/workflows/dagster-cloud-deploy.yml` in your repository, and uncomment the section on Azure Container Registry. It should look like this: +For this step, open `.github/workflows/dagster-cloud-deploy.yml` in your repository with your preferred text editor to perform the changes below. + +In the `env` section of the workflow, update the following variables: + +- `DAGSTER_CLOUD_ORGANIZATION`: The name of your Dagster Cloud organization. +- `IMAGE_REGISTRY`: The URL of your Azure Container Registry: `.azurecr.io`. + +We'll update the workflow to use the Azure Container Registry by uncommenting its section and providing the principal details. It should look like this: ```yaml # Azure Container Registry (ACR) @@ -114,6 +122,34 @@ Finally, we'll update the workflow to use the service principal details. Open `. password: ${{ secrets.AZURE_CLIENT_SECRET }} ``` +Finally, update the tags in the "Build and upload Docker image" step to match the full URL of your image in ACR: + +```yaml +- name: Build and upload Docker image for "quickstart_etl" + if: steps.prerun.outputs.result != 'skip' + uses: docker/build-push-action@v4 + with: + context: . + push: true + tags: ${{ env.IMAGE_REGISTRY }}/:${{ env.IMAGE_TAG }} + cache-from: type=gha + cache-to: type=gha,mode=max +``` + +### Update the `dagster_cloud.yaml` build configuration to use the Azure Container Registry + +Edit the `dagster_cloud.yaml` file in the root of your repository. Update the `build` section to use the Azure Container Registry, and provide an image name specific to the code location. This must match the registry and image name used in the previous step. + +```yaml +locations: + - location_name: quickstart_etl + code_source: + package_name: quickstart_etl.definitions + build: + directory: ./ + registry: .azurecr.io/ +``` + ### Push and run the workflow Now, commit and push the changes to your repository. The GitHub Actions workflow should run automatically. You can check the status of the workflow in the `Actions` tab of your repository. @@ -133,3 +169,7 @@ alt="Dagster+ code locations page showing the new code location" width={1152} height={320} /> + +## Next steps + +Now that you have your code location deployed, you can follow the guide [here](/dagster-plus/deployment/azure/blob-compute-logs) to set up logging in your AKS cluster. diff --git a/docs/content/dagster-plus/deployment/azure/blob-compute-logs.mdx b/docs/content/dagster-plus/deployment/azure/blob-compute-logs.mdx index 698d8def57482..98d80e90aac2f 100644 --- a/docs/content/dagster-plus/deployment/azure/blob-compute-logs.mdx +++ b/docs/content/dagster-plus/deployment/azure/blob-compute-logs.mdx @@ -25,14 +25,19 @@ First, we'll enable the cluster to use workload identity. This will allow the AK az aks update --resource-group --name --enable-workload-identity ``` -Then, we'll create a new managed identity for the AKS agent, and a new service account in our AKS cluster. +Then, we'll create a new managed identity for the AKS agent. ```bash az identity create --resource-group --name agent-identity -kubectl create serviceaccount dagster-agent-service-account --namespace dagster-agent ``` -Now we need to federate the managed identity with the service account. +We will need to find the name of the service account used by the Dagster+ Agent. If you used the [Dagster+ Helm chart](/dagster-plus/deployment/agents/kubernetes/configuring-running-kubernetes-agent), it should be `user-cloud-dagster-cloud-agent`. You can confirm by using this command: + +```bash +kubectl get serviceaccount -n +``` + +Now we need to federate the managed identity with the service account used by the Dagster+ Agent. ```bash az identity federated-credential create \ @@ -40,51 +45,63 @@ az identity federated-credential create \ --identity-name agent-identity \ --resource-group \ --issuer $(az aks show -g -n --query "oidcIssuerProfile.issuerUrl" -otsv) \ - --subject system:serviceaccount:dagster-agent:dagster-agent-service-account + --subject system:serviceaccount:: ``` -Finally, we'll edit our AKS agent deployment to use the new service account. +You will need to obtain the client id of this identity for the next few operations. Make sure to save this value: ```bash -kubectl edit deployment -n dagster-agent +az identity show -g -n agent-identity --query 'clientId' -otsv ``` -In the deployment manifest, add the following lines: +We need to grant access to the storage account. + +```bash +az role assignment create \ + --assignee \ + --role "Storage Blob Data Contributor" \ + --scope $(az storage account show -g -n --query 'id' -otsv) +``` + +You will need to add new annotations and labels in Kubernetes to enable the use of workload identities. If you're using the Dagster+ Helm Chart, modify your values.yaml to add the following lines: ```yaml -metadata: - ... +serviceAccount: + annotations: + azure.workload.identity/client-id: "" + +dagsterCloudAgent: + labels: + azure.workload.identity/use: "true" + +workspace: labels: - ... azure.workload.identity/use: "true" -spec: - ... - template: - ... - spec: - ... - serviceAccountName: dagster-agent-sa ``` -If everything is set up correctly, you should be able to run the following command and see an access token returned: + + If you need to retrieve the values used by your Helm deployment, you can run: + `helm get values user-cloud > values.yaml`. + + +Finally, update your Helm release with the new values: ```bash -kubectl exec -n dagster-agent -it -- bash -# in the pod -curl -H "Metadata:true" "http://169.254.169.254/metadata/identity/oauth2/token?resource=https://storage.azure.com/" +helm upgrade user-cloud dagster-cloud/dagster-cloud-agent -n -f values.yaml ``` -## Step 2: Configure Dagster to use Azure Blob Storage - -Now, you need to update the helm values to use Azure Blob Storage for logs. You can do this by editing the `values.yaml` file for your user-cloud deployment. - -Pull down the current values for your deployment: +If everything is set up correctly, you should be able to run the following command and see an access token returned: ```bash -helm get values user-cloud > current-values.yaml +kubectl exec -n -it -- bash +# in the pod +apt update && apt install -y curl # install curl if missing, may vary depending on the base image +curl -H "Metadata:true" "http://169.254.169.254/metadata/identity/oauth2/token?resource=https://storage.azure.com/&api-version=2018-02-01" ``` -Then, edit the `current-values.yaml` file to include the following lines: +## Step 2: Configure Dagster to use Azure Blob Storage + +Once again, you need to update the Helm values to use Azure Blob Storage for logs. You can do this by editing the `values.yaml` file for your user-cloud deployment to include the following lines: ```yaml computeLogs: @@ -97,7 +114,7 @@ computeLogs: container: mycontainer default_azure_credential: exclude_environment_credential: false - prefix: dagster-logs- + prefix: dagster-logs local_dir: "/tmp/cool" upload_interval: 30 ``` @@ -105,10 +122,14 @@ computeLogs: Finally, update your deployment with the new values: ```bash -helm upgrade user-cloud dagster-cloud/dagster-cloud-agent -n dagster-agent -f current-values.yaml +helm upgrade user-cloud dagster-cloud/dagster-cloud-agent -n -f values.yaml ``` -## Step 3: Verify logs are being written to Azure Blob Storage +## Step 3: Update your code location to enable the use of the AzureBlobComputeLogManager + +- Add `dagster-azure` to your `setup.py` file. This will allow you to import the `AzureBlobComputeLogManager` class. + +## Step 4: Verify logs are being written to Azure Blob Storage It's time to kick off a run in Dagster to test your new configuration. If following along with the quickstart repo, you should be able to kick off a run of the `all_assets_job`, which will generate logs for you to test against. Otherwise, use any job that emits logs. When you go to the stdout/stderr window of the run page, you should see a log file that directs you to the Azure Blob Storage container. diff --git a/docs/content/deployment/run-monitoring.mdx b/docs/content/deployment/run-monitoring.mdx index 5bafc1acdd697..6673fed36651e 100644 --- a/docs/content/deployment/run-monitoring.mdx +++ b/docs/content/deployment/run-monitoring.mdx @@ -39,7 +39,7 @@ When Dagster terminates a run, the run moves into CANCELING status and sends a t ## General run timeouts -After a run is marked as STARTED, it may hang indefinitely for various reasons (user API errors, network issues, etc.). You can configure a maximum runtime for every run in a deployment by setting the `run_monitoring.max_runtime_seconds` field in your dagster.yaml or (Dagster+ deployment settings)\[dagster-plus/managing-deployments/deployment-settings-reference] to the maximum runtime in seconds. If a run exceeds this timeout and run monitoring is enabled, it will be marked as failed. The `dagster/max_runtime` tag can also be used to set a timeout in seconds on a per-run basis. +After a run is marked as STARTED, it may hang indefinitely for various reasons (user API errors, network issues, etc.). You can configure a maximum runtime for every run in a deployment by setting the `run_monitoring.max_runtime_seconds` field in your dagster.yaml or [Dagster+ deployment settings](/dagster-plus/managing-deployments/deployment-settings-reference) to the maximum runtime in seconds. If a run exceeds this timeout and run monitoring is enabled, it will be marked as failed. The `dagster/max_runtime` tag can also be used to set a timeout in seconds on a per-run basis. For example, to configure a maximum of 2 hours for every run in your deployment: diff --git a/docs/content/guides/migrations.mdx b/docs/content/guides/migrations.mdx index ece1d40fe8b3a..f968e1aa1dc9b 100644 --- a/docs/content/guides/migrations.mdx +++ b/docs/content/guides/migrations.mdx @@ -13,4 +13,3 @@ Explore your options for migrating from other platforms to Dagster. Curious how you can migrate your Airflow pipelines to Dagster? - Learn how to perform [a lift-and-shift migration of Airflow to Dagster](/guides/migrations/migrating-airflow-to-dagster) -- Learn how to leverage the features of [Dagster and Airflow together using Dagster Pipes](/guides/migrations/observe-your-airflow-pipelines-with-dagster) diff --git a/docs/content/guides/migrations/observe-your-airflow-pipelines-with-dagster.mdx b/docs/content/guides/migrations/observe-your-airflow-pipelines-with-dagster.mdx deleted file mode 100644 index ed2f0fe3cbfc6..0000000000000 --- a/docs/content/guides/migrations/observe-your-airflow-pipelines-with-dagster.mdx +++ /dev/null @@ -1,105 +0,0 @@ ---- -title: "Observe your Airflow pipelines with Dagster | Dagster Docs" -description: "Learn how to leverage the features of Dagster and Airflow together." ---- - -# Observe your Airflow pipelines with Dagster - -Dagster can act as a single entry point to all orchestration platforms in use at your organization. By injecting a small amount of code into your existing pipelines, you can report events to Dagster, where you can then visualize the full lineage of pipelines. This can be particularly useful if you have multiple Apache Airflow environments, and hope to build a catalog and observation platform through Dagster. - -## Emitting materialization events from Airflow to Dagster - -Imagine you have a large number of pipelines written in Apache Airflow and wish to introduce Dagster into your stack. By using custom Airflow operators, you can continue to run your existing pipelines while you work toward migrating them off Airflow, or while building new pipelines in Dagster that are tightly integrated with your legacy systems. - -To do this, we will define a `DagsterAssetOperator` operator downstream of your Airflow DAG to indicate that the pipeline's processing has concluded. The HTTP endpoint of the Dagster server, the `asset_key`, and additional metadata and descriptions are to be specified to inform Dagster of the materialization. - -```python -from typing import Dict, Optional - -from airflow.models import BaseOperator -from airflow.utils.decorators import apply_defaults -import requests - -class DagsterAssetOperator(BaseOperator): - @apply_defaults - def __init__( - self, - dagster_webserver_host: str, - dagster_webserver_port: str, - asset_key: str, - metadata: Optional[Dict] = None, - description: Optional[str] = None, - *args, - **kwargs, - ): - super().__init__(*args, **kwargs) - self.dagster_webserver_host = dagster_webserver_host - self.dagster_webserver_port = dagster_webserver_port - self.asset_key = asset_key - self.metadata = metadata or {} - self.description = description - - def execute(self, context): - url = f"http://{dagster_webserver_host}:{dagster_webserver_port}/report_asset_materialization/{self.asset_key}" - payload = {"metadata": self.metadata, "description": self.description} - headers = {"Content-Type": "application/json"} - - response = requests.post(url, json=payload, headers=headers) - response.raise_for_status() - - self.log.info( - f"Reported asset materialization to Dagster. Response: {response.text}" - ) -``` - -Then, we can append this to our Airflow DAG to indicate that a pipeline has run successfully. - -```python -import os - -dagster_webserver_host = os.environ.get("DAGSTER_WEBSERVER_HOST", "localhost") -dagster_webserver_port = os.environ.get("DAGSTER_WEBSERVER_PORT", "3000") - -dagster_op = DagsterAssetOperator( - task_id="report_dagster_asset_materialization", - dagster_webserver_host=dagster_webserver_host, - dagster_webserver_port=dagster_webserver_port, - asset_key="example_external_airflow_asset", - metadata={"airflow/tag": "example", "source": "external"}, -) -``` - -Once the events are emitted from Airflow, there are two options for scheduling Dagster materializations following the external Airflow materialization event: asset sensors and auto materialization policies. - -An external asset is created in Dagster, and an `asset_sensor` is used to identify the materialization events that are being sent from Airflow. - -```python -from dagster import external_asset_from_spec - -example_external_airflow_asset = external_asset_from_spec( - AssetSpec("example_external_airflow_asset", - group_name="External") -) -``` - -```python -from dagster import ( - AssetKey, - EventLogEntry, - RunRequest, - SensorEvaluationContext, - asset_sensor -) - -@asset_sensor( - asset_key=AssetKey("example_external_airflow_asset"), - job=example_external_airflow_asset_job -) -def example_external_airflow_asset_sensor( - context: SensorEvaluationContext, asset_event: EventLogEntry -): - assert asset_event.dagster_event and asset_event.dagster_event.asset_key - yield RunRequest(run_key=context.cursor) -``` - -Now, when a materialization event occurs on the external `example_external_airflow_asset` asset, the `example_external_airflow_asset_job` job will be triggered. Here, you can define logic that can build upon the DAG from your Airflow environment. diff --git a/docs/content/integrations/airflow.mdx b/docs/content/integrations/airflow.mdx index 41d0d0c53ab73..66a46614286fb 100644 --- a/docs/content/integrations/airflow.mdx +++ b/docs/content/integrations/airflow.mdx @@ -9,7 +9,6 @@ Migrating from Airflow to Dagster, or integrating Dagster into your existing wor - [Learning Dagster from Airflow](/integrations/airflow/from-airflow-to-dagster) - a step-by-step tutorial of mapping concepts from Airflow to Dagster - [Migrating from Airflow](/guides/migrations/migrating-airflow-to-dagster) - migration patterns for translating Airflow code to Dagster -- [Observe your Airflow pipelines with Dagster](/guides/migrations/observe-your-airflow-pipelines-with-dagster) - See how Dagster can act as the observation layer over all pipelines in your organization --- diff --git a/docs/content/integrations/fivetran/fivetran.mdx b/docs/content/integrations/fivetran/fivetran.mdx index cd33c2331e76e..2c61ea9917b23 100644 --- a/docs/content/integrations/fivetran/fivetran.mdx +++ b/docs/content/integrations/fivetran/fivetran.mdx @@ -26,6 +26,14 @@ This guide provides instructions for using Dagster with Fivetran using the `dags +## Set up your environment + +To get started, you'll need to install the `dagster` and `dagster-fivetran` Python packages: + +```bash +pip install dagster dagster-fivetran +``` + ## Represent Fivetran assets in the asset graph To load Fivetran assets into the Dagster asset graph, you must first construct a resource, which allows Dagster to communicate with your Fivetran workspace. You'll need to supply your account ID, API key and API secret. See [Getting Started](https://fivetran.com/docs/rest-api/getting-started) in the Fivetran REST API documentation for more information on how to create your API key and API secret. diff --git a/docs/dagster-university/next-env.d.ts b/docs/dagster-university/next-env.d.ts index 4f11a03dc6cc3..a4a7b3f5cfa2f 100644 --- a/docs/dagster-university/next-env.d.ts +++ b/docs/dagster-university/next-env.d.ts @@ -2,4 +2,4 @@ /// // NOTE: This file should not be edited -// see https://nextjs.org/docs/basic-features/typescript for more information. +// see https://nextjs.org/docs/pages/building-your-application/configuring/typescript for more information. diff --git a/docs/dagster-university/pages/dagster-essentials/lesson-2/requirements-and-installation.md b/docs/dagster-university/pages/dagster-essentials/lesson-2/requirements-and-installation.md index 363718ca770fe..b074d2757ae49 100644 --- a/docs/dagster-university/pages/dagster-essentials/lesson-2/requirements-and-installation.md +++ b/docs/dagster-university/pages/dagster-essentials/lesson-2/requirements-and-installation.md @@ -9,11 +9,12 @@ lesson: '2' To install Dagster, you’ll need: - **To install Python**. Dagster supports Python 3.9 through 3.12. -- **A package manager like pip or poetry**. If you need to install a package manager, refer to the following installation guides: +- **A package manager like pip, Poetry, or uv**. If you need to install a package manager, refer to the following installation guides: - [pip](https://pip.pypa.io/en/stable/installation/) - [Poetry](https://python-poetry.org/docs/) + - [uv](https://docs.astral.sh/uv/getting-started/installation/) -To check that Python and the pip or Poetry package manager are already installed in your environment, run: +To check that Python and the package manager are already installed in your environment, run: ```shell python --version diff --git a/docs/dagster-university/pages/dagster-essentials/lesson-3/whats-an-asset.md b/docs/dagster-university/pages/dagster-essentials/lesson-3/whats-an-asset.md index 7e3d27ebd2cee..99499c844f619 100644 --- a/docs/dagster-university/pages/dagster-essentials/lesson-3/whats-an-asset.md +++ b/docs/dagster-university/pages/dagster-essentials/lesson-3/whats-an-asset.md @@ -10,8 +10,8 @@ An asset is an object in persistent storage that captures some understanding of - **A database table or view**, such as those in a Google BigQuery data warehouse - **A file**, such as a file in your local machine or blob storage like Amazon S3 -- **A machine learning model** -- **An asset from an integration,** like a dbt model or a Fivetran connector +- **A machine learning model**, such as TensorFlow or PyTorch +- **An asset from an integration,** such as a dbt model or a Fivetran connector Assets aren’t limited to just the objects listed above - these are just some common examples. diff --git a/docs/dagster-university/pages/dagster-essentials/lesson-4/coding-practice-taxi-zones-asset.md b/docs/dagster-university/pages/dagster-essentials/lesson-4/coding-practice-taxi-zones-asset.md index 92eab5b00a8b5..69f9139b16c2b 100644 --- a/docs/dagster-university/pages/dagster-essentials/lesson-4/coding-practice-taxi-zones-asset.md +++ b/docs/dagster-university/pages/dagster-essentials/lesson-4/coding-practice-taxi-zones-asset.md @@ -30,7 +30,7 @@ The asset you built should look similar to the following code. Click **View answ deps=["taxi_zones_file"] ) def taxi_zones() -> None: - sql_query = f""" + query = f""" create or replace table zones as ( select LocationID as zone_id, @@ -41,6 +41,13 @@ def taxi_zones() -> None: ); """ - conn = duckdb.connect(os.getenv("DUCKDB_DATABASE")) - conn.execute(sql_query) + conn = backoff( + fn=duckdb.connect, + retry_on=(RuntimeError, duckdb.IOException), + kwargs={ + "database": os.getenv("DUCKDB_DATABASE"), + }, + max_retries=10, + ) + conn.execute(query) ``` diff --git a/docs/dagster-university/pages/dagster-essentials/lesson-4/coding-practice-trips-by-week-asset.md b/docs/dagster-university/pages/dagster-essentials/lesson-4/coding-practice-trips-by-week-asset.md index 8067d27dd281a..b5c5e90b1bfad 100644 --- a/docs/dagster-university/pages/dagster-essentials/lesson-4/coding-practice-trips-by-week-asset.md +++ b/docs/dagster-university/pages/dagster-essentials/lesson-4/coding-practice-trips-by-week-asset.md @@ -62,12 +62,20 @@ from datetime import datetime, timedelta from . import constants import pandas as pd +from dagster._utils.backoff import backoff @asset( deps=["taxi_trips"] ) def trips_by_week() -> None: - conn = duckdb.connect(os.getenv("DUCKDB_DATABASE")) + conn = backoff( + fn=duckdb.connect, + retry_on=(RuntimeError, duckdb.IOException), + kwargs={ + "database": os.getenv("DUCKDB_DATABASE"), + }, + max_retries=10, + ) current_date = datetime.strptime("2023-03-01", constants.DATE_FORMAT) end_date = datetime.strptime("2023-04-01", constants.DATE_FORMAT) diff --git a/docs/dagster-university/pages/dagster-essentials/lesson-4/loading-data-into-a-database.md b/docs/dagster-university/pages/dagster-essentials/lesson-4/loading-data-into-a-database.md index 0bf9ced624484..f5bc537d9837b 100644 --- a/docs/dagster-university/pages/dagster-essentials/lesson-4/loading-data-into-a-database.md +++ b/docs/dagster-university/pages/dagster-essentials/lesson-4/loading-data-into-a-database.md @@ -13,6 +13,7 @@ Now that you have a query that produces an asset, let’s use Dagster to manage ```python import duckdb import os + from dagster._utils.backoff import backoff ``` 2. Copy and paste the code below into the bottom of the `trips.py` file. Note how this code looks similar to the asset definition code for the `taxi_trips_file` and the `taxi_zones` assets: @@ -25,7 +26,7 @@ Now that you have a query that produces an asset, let’s use Dagster to manage """ The raw taxi trips dataset, loaded into a DuckDB database """ - sql_query = """ + query = """ create or replace table trips as ( select VendorID as vendor_id, @@ -42,8 +43,15 @@ Now that you have a query that produces an asset, let’s use Dagster to manage ); """ - conn = duckdb.connect(os.getenv("DUCKDB_DATABASE")) - conn.execute(sql_query) + conn = backoff( + fn=duckdb.connect, + retry_on=(RuntimeError, duckdb.IOException), + kwargs={ + "database": os.getenv("DUCKDB_DATABASE"), + }, + max_retries=10, + ) + conn.execute(query) ``` Let’s walk through what this code does: @@ -52,13 +60,13 @@ Now that you have a query that produces an asset, let’s use Dagster to manage 2. The `taxi_trips_file` asset is defined as a dependency of `taxi_trips` through the `deps` argument. - 3. Next, a variable named `sql_query` is created. This variable contains a SQL query that creates a table named `trips`, which sources its data from the `data/raw/taxi_trips_2023-03.parquet` file. This is the file created by the `taxi_trips_file` asset. + 3. Next, a variable named `query` is created. This variable contains a SQL query that creates a table named `trips`, which sources its data from the `data/raw/taxi_trips_2023-03.parquet` file. This is the file created by the `taxi_trips_file` asset. - 4. A variable named `conn` is created, which defines the connection to the DuckDB database in the project. To do this, it uses the `.connect` method from the `duckdb` library, passing in the `DUCKDB_DATABASE` environment variable to tell DuckDB where the database is located. + 4. A variable named `conn` is created, which defines the connection to the DuckDB database in the project. To do this, we first wrap everything with the Dagster utility function `backoff`. Using the backoff function ensures that multiple assets can use DuckDB safely without locking resources. The backoff function takes in the function we want to call (in this case the `.connect` method from the `duckdb` library), any errors to retry on (`RuntimeError` and `duckdb.IOException`), the max number of retries, and finally, the arguments to supply to the `.connect` DuckDB method. Here we are passing in the `DUCKDB_DATABASE` environment variable to tell DuckDB where the database is located. The `DUCKDB_DATABASE` environment variable, sourced from your project’s `.env` file, resolves to `data/staging/data.duckdb`. **Note**: We set up this file in Lesson 2 - refer to this lesson if you need a refresher. If this file isn’t set up correctly, the materialization will result in an error. - 5. Finally, `conn` is paired with the DuckDB `execute` method, where our SQL query (`sql_query`) is passed in as an argument. This tells the asset that, when materializing, to connect to the DuckDB database and execute the query in `sql_query`. + 5. Finally, `conn` is paired with the DuckDB `execute` method, where our SQL query (`query`) is passed in as an argument. This tells the asset that, when materializing, to connect to the DuckDB database and execute the query in `query`. 3. Save the changes to the file. @@ -98,9 +106,9 @@ This is because you’ve told Dagster that taxi_trips depends on the taxi_trips_ To confirm that the `taxi_trips` asset materialized properly, you can access the newly made `trips` table in DuckDB. In a new terminal session, open a Python REPL and run the following snippet: ```python -> import duckdb -> conn = duckdb.connect(database="data/staging/data.duckdb") # assumes you're writing to the same destination as specified in .env.example -> conn.execute("select count(*) from trips").fetchall() +import duckdb +conn = duckdb.connect(database="data/staging/data.duckdb") # assumes you're writing to the same destination as specified in .env.example +conn.execute("select count(*) from trips").fetchall() ``` The command should succeed and return a row count of the taxi trips that were ingested. When finished, make sure to stop the terminal process before continuing or you may encounter an error. Use `Control+C` or `Command+C` to stop the process. diff --git a/docs/dagster-university/pages/dagster-essentials/lesson-6/setting-up-a-database-resource.md b/docs/dagster-university/pages/dagster-essentials/lesson-6/setting-up-a-database-resource.md index 311748941755c..9dadbf149bfa1 100644 --- a/docs/dagster-university/pages/dagster-essentials/lesson-6/setting-up-a-database-resource.md +++ b/docs/dagster-university/pages/dagster-essentials/lesson-6/setting-up-a-database-resource.md @@ -14,7 +14,14 @@ Throughout this module, you’ve used DuckDB to store and transform your data. E ) def taxi_trips() -> None: ... - conn = duckdb.connect(os.getenv("DUCKDB_DATABASE")) + conn = backoff( + fn=duckdb.connect, + retry_on=(RuntimeError, duckdb.IOException), + kwargs={ + "database": os.getenv("DUCKDB_DATABASE"), + }, + max_retries=10, + ) ... ``` diff --git a/docs/dagster-university/pages/dagster-essentials/lesson-6/using-resources-in-assets.md b/docs/dagster-university/pages/dagster-essentials/lesson-6/using-resources-in-assets.md index 7e27a7e555ab1..6f059f62f2bdc 100644 --- a/docs/dagster-university/pages/dagster-essentials/lesson-6/using-resources-in-assets.md +++ b/docs/dagster-university/pages/dagster-essentials/lesson-6/using-resources-in-assets.md @@ -31,7 +31,7 @@ from dagster import asset deps=["taxi_trips_file"], ) def taxi_trips() -> None: - sql_query = """ + query = """ create or replace table taxi_trips as ( select VendorID as vendor_id, @@ -48,8 +48,15 @@ def taxi_trips() -> None: ); """ - conn = duckdb.connect(os.getenv("DUCKDB_DATABASE")) - conn.execute(sql_query) + conn = backoff( + fn=duckdb.connect, + retry_on=(RuntimeError, duckdb.IOException), + kwargs={ + "database": os.getenv("DUCKDB_DATABASE"), + }, + max_retries=10, + ) + conn.execute(query) ``` --- @@ -72,7 +79,7 @@ from dagster import asset deps=["taxi_trips_file"], ) def taxi_trips(database: DuckDBResource) -> None: - sql_query = """ + query = """ create or replace table taxi_trips as ( select VendorID as vendor_id, @@ -90,7 +97,7 @@ def taxi_trips(database: DuckDBResource) -> None: """ with database.get_connection() as conn: - conn.execute(sql_query) + conn.execute(query) ``` To refactor `taxi_trips` to use the `database` resource, we had to: @@ -100,7 +107,14 @@ To refactor `taxi_trips` to use the `database` resource, we had to: 3. Replace the lines that connect to DuckDB and execute a query: ```python - conn = duckdb.connect(os.getenv("DUCKDB_DATABASE")) + conn = backoff( + fn=duckdb.connect, + retry_on=(RuntimeError, duckdb.IOException), + kwargs={ + "database": os.getenv("DUCKDB_DATABASE"), + }, + max_retries=10, + ) conn.execute(query) ``` @@ -111,6 +125,8 @@ To refactor `taxi_trips` to use the `database` resource, we had to: conn.execute(query) ``` + Notice that we no longer need to use the `backoff` function. The Dagster `DuckDBResource` handles this functionality for us. + --- ## Before you continue diff --git a/docs/dagster-university/pages/dagster-essentials/lesson-7/what-are-schedules.md b/docs/dagster-university/pages/dagster-essentials/lesson-7/what-are-schedules.md index 7abd4f66c4963..f91b4cd4900f0 100644 --- a/docs/dagster-university/pages/dagster-essentials/lesson-7/what-are-schedules.md +++ b/docs/dagster-university/pages/dagster-essentials/lesson-7/what-are-schedules.md @@ -92,7 +92,7 @@ Despite many schedulers and orchestrators replacing the cron program since then, Consider the following example: -```python +``` 15 5 * * 1-5 ``` diff --git a/docs/dagster-university/pages/dagster-essentials/lesson-8/adding-partitions-to-assets.md b/docs/dagster-university/pages/dagster-essentials/lesson-8/adding-partitions-to-assets.md index cdfdd00cf041e..c44c2b93eafc9 100644 --- a/docs/dagster-university/pages/dagster-essentials/lesson-8/adding-partitions-to-assets.md +++ b/docs/dagster-university/pages/dagster-essentials/lesson-8/adding-partitions-to-assets.md @@ -63,7 +63,7 @@ To add the partition to the asset: @asset( partitions_def=monthly_partition ) - def taxi_trips_file(context) -> None: + def taxi_trips_file(context: AssetExecutionContext) -> None: partition_date_str = context.partition_key ``` @@ -73,7 +73,7 @@ To add the partition to the asset: @asset( partitions_def=monthly_partition ) - def taxi_trips_file(context) -> None: + def taxi_trips_file(context: AssetExecutionContext) -> None: partition_date_str = context.partition_key month_to_fetch = partition_date_str[:-3] ``` @@ -86,7 +86,7 @@ from ..partitions import monthly_partition @asset( partitions_def=monthly_partition ) -def taxi_trips_file(context) -> None: +def taxi_trips_file(context: AssetExecutionContext) -> None: """ The raw parquet files for the taxi trips dataset. Sourced from the NYC Open Data portal. """ diff --git a/docs/dagster-university/pages/dagster-essentials/lesson-8/coding-practice-partition-taxi-trips.md b/docs/dagster-university/pages/dagster-essentials/lesson-8/coding-practice-partition-taxi-trips.md index f2aa0b5ffc093..606ee4b3847c0 100644 --- a/docs/dagster-university/pages/dagster-essentials/lesson-8/coding-practice-partition-taxi-trips.md +++ b/docs/dagster-university/pages/dagster-essentials/lesson-8/coding-practice-partition-taxi-trips.md @@ -17,7 +17,7 @@ To practice what you’ve learned, partition the `taxi_trips` asset by month usi {% callout %} You’ll need to drop the existing `taxi_trips` because of the new `partition_date` column. In a Python REPL or scratch script, run the following: - ```yaml + ``` import duckdb conn = duckdb.connect(database="data/staging/data.duckdb") conn.execute("drop table trips;") diff --git a/docs/dagster-university/pages/dagster-essentials/lesson-8/creating-a-schedule-with-a-date-based-partition.md b/docs/dagster-university/pages/dagster-essentials/lesson-8/creating-a-schedule-with-a-date-based-partition.md index a6327cb6ad954..cd8793b6755fe 100644 --- a/docs/dagster-university/pages/dagster-essentials/lesson-8/creating-a-schedule-with-a-date-based-partition.md +++ b/docs/dagster-university/pages/dagster-essentials/lesson-8/creating-a-schedule-with-a-date-based-partition.md @@ -36,12 +36,14 @@ To add partition to the job, make the following changes: The job should now look like this: ```python -from dagster import define_asset_job, AssetSelection, AssetKey +from dagster import define_asset_job, AssetSelection from ..partitions import monthly_partition +trips_by_week = AssetSelection.assets("trips_by_week") + trip_update_job = define_asset_job( name="trip_update_job", partitions_def=monthly_partition, # partitions added here - selection=AssetSelection.all() - AssetSelection.assets(["trips_by_week"]) + selection=AssetSelection.all() - trips_by_week ) ``` diff --git a/docs/docs-beta/CONTRIBUTING.md b/docs/docs-beta/CONTRIBUTING.md new file mode 100644 index 0000000000000..c0b8e52ba5712 --- /dev/null +++ b/docs/docs-beta/CONTRIBUTING.md @@ -0,0 +1,142 @@ +# Contributing + +## Migration from legacy docs + +There are some features in the previous docs that require changes to be made to work in the new Docusaurus-based documentation site. + +### Images + +Before: + +``` +Highlighted Redeploy option in the dropdown menu next to a code location in Dagster+ +``` + +After: + +``` + +``` + +### Notes + +Before: + +``` +This guide is applicable to Dagster+. +``` + +After: + +``` +:::note +This guide is applicable to Dagster+ +::: +``` + +### Tabs + +Before: + +``` + + + ... + + +``` + +After: + +``` + + + ... + + +``` + +### Header boundaries + +Previously, horizontal rules had to be defined between each level-two header: `---`. + +This is no longer required, as the horizontal rule has been included in the CSS rules. + +### Reference tables + +Before: + +``` + + + The name of the Dagster+ deployment. For example, prod. + + + If 1, the deployment is a{" "} + + branch deployment + + . Refer to the + Branch Deployment variables section + for a list of variables available in branch deployments. + + +``` + +After: + +| Key | Value | +|---|---| +| `DAGSTER_CLOUD_DEPLOYMENT_NAME` | The name of the Dagster+ deployment.

**Example:** `prod`. | +| `DAGSTER_CLOUD_IS_BRANCH_DEPLOYMENT` | `1` if the deployment is a [branch deployment](/dagster-plus/features/ci-cd/branch-deployments/index.md). | + +### Whitespace via `{" "}` + +Forcing empty space using the `{" "}` interpolation is not supported, and must be removed. + +--- + +## Diagrams + +You can use [Mermaid.js](https://mermaid.js.org/syntax/flowchart.html) to create diagrams. For example: + +```mermaid +flowchart LR + Start --> Stop +``` + +Refer to the [Mermaid.js documentation](https://mermaid.js.org/) for more info. + +--- + +## Code examples + +To include code snippets, use the following format: + +``` + +``` + +The `filePath` is relative to the `./examples/docs_beta_snippets/docs_beta_snippets/` directory. + +At minimum, all `.py` files in the `docs_beta_snippets` directory are tested by attempting to load the Python files. +You can write additional tests for them in the `docs_beta_snippets_test` folder. See the folder for more information. + +To type-check the code snippets during development, run the following command from the Dagster root folder. +This will run `pyright` on all new/changed files relative to the master branch. + +``` +make quick_pyright +``` diff --git a/docs/docs-beta/README.md b/docs/docs-beta/README.md index 33e2e5fbf0ae8..f07c8ddd61daa 100644 --- a/docs/docs-beta/README.md +++ b/docs/docs-beta/README.md @@ -5,6 +5,26 @@ The documentation site is built using [Docusaurus](https://docusaurus.io/), a mo --- +## Overview of the docs + +- `./src` contains custom components, styles, themes, and layouts. +- `./content-templates` contains the templates for the documentation pages. +- `./docs/` is the source of truth for the documentation. +- `/examples/docs_beta_snippets/docs_beta_snippets/` contains all code examples for the documentation. + +The docs are broken down into the following sections: + +- Docs - includes content from [getting-started](./docs/getting-started/) and [guides](./docs/guides/) +- [Integrations](./docs/integrations/) +- [Dagster+](./docs/dagster-plus/) +- [API reference](./docs/api/) + +`sidebar.ts` and `docusaurus.config.ts` are the main configuration files for the documentation. + +For formatting guidelines, see the [CONTRIBUTING](CONTRIBUTING.md) guide. + +--- + ## Installation The site uses [yarn](https://yarnpkg.com/) for package management. @@ -31,22 +51,6 @@ pip install vale --- -## Overview of the docs - -- `./src` contains custom components, styles, themes, and layouts. -- `./content-templates` contains the templates for the documentation pages. -- `./docs/` is the source of truth for the documentation. -- `/examples/docs_beta_snippets/docs_beta_snippets/` contains all code examples for the documentation. - -The docs are broken down into the following sections: - -- [Tutorials](./docs/tutorials/) -- [Guides](./docs/guides/) - -`sidebar.ts` and `docusaurus.config.ts` are the main configuration files for the documentation. - ---- - ## Local Development To start the local development server: @@ -72,37 +76,6 @@ yarn vale /path/to/file ## check individual file yarn vale --no-wrap ## remove wrapping from output ``` -### Diagrams - -You can use [Mermaid.js](https://mermaid.js.org/syntax/flowchart.html) to create diagrams. For example: - -```mermaid -flowchart LR - Start --> Stop -``` - -Refer to the [Mermaid.js documentation](https://mermaid.js.org/) for more info. - -### Code examples - -To include code snippets, use the following format: - -``` - -``` - -The `filePath` is relative to the `./examples/docs_beta_snippets/docs_beta_snippets/` directory. - -At minimum, all `.py` files in the `docs_beta_snippets` directory are tested by attempting to load the Python files. -You can write additional tests for them in the `docs_beta_snippets_test` folder. See the folder for more information. - -To type-check the code snippets during development, run the following command from the Dagster root folder. -This will run `pyright` on all new/changed files relative to the master branch. - -``` -make quick_pyright -``` - --- ## Build @@ -115,6 +88,8 @@ yarn build This command generates static content into the `build` directory and can be served using any static contents hosting service. This also checks for any broken links in the documentation. Note that you will need to store Algolia credentials in local environment variables to build the site for production. +--- + ## Deployment This site is built and deployed using Vercel. @@ -129,6 +104,8 @@ yarn sync-api-docs && yarn build This runs the `scripts/vercel-sync-api-docs.sh` script which builds the MDX files using the custom `sphinx-mdx-builder`, and copies the resulting MDX files to `docs/api/python-api`. +--- + ## Search Algolia search is used for search results on the website, as configured in `docusaurus.config.ts`. diff --git a/docs/docs-beta/docs/dagster-plus/features/code-locations/code-location-history.md b/docs/docs-beta/docs/dagster-plus/deployment/code-locations/code-location-history.md similarity index 97% rename from docs/docs-beta/docs/dagster-plus/features/code-locations/code-location-history.md rename to docs/docs-beta/docs/dagster-plus/deployment/code-locations/code-location-history.md index bfdaa044ac9ce..a3b242c3b3fb7 100644 --- a/docs/docs-beta/docs/dagster-plus/features/code-locations/code-location-history.md +++ b/docs/docs-beta/docs/dagster-plus/deployment/code-locations/code-location-history.md @@ -49,5 +49,5 @@ If you notice an issue with newly deployed code, or your code fails to deploy su ## Next steps -- Learn more about [Code Locations](/dagster-plus/features/code-locations) +- Learn more about [Code Locations](/dagster-plus/deployment/code-locations) - Learn how to [Alert when a code location fails to load](/dagster-plus/features/alerts/creating-alerts#alerting-when-a-code-location-fails-to-load) diff --git a/docs/docs-beta/docs/dagster-plus/features/code-locations/dagster-cloud-yaml.md b/docs/docs-beta/docs/dagster-plus/deployment/code-locations/dagster-cloud-yaml.md similarity index 100% rename from docs/docs-beta/docs/dagster-plus/features/code-locations/dagster-cloud-yaml.md rename to docs/docs-beta/docs/dagster-plus/deployment/code-locations/dagster-cloud-yaml.md diff --git a/docs/docs-beta/docs/dagster-plus/features/code-locations/index.md b/docs/docs-beta/docs/dagster-plus/deployment/code-locations/index.md similarity index 99% rename from docs/docs-beta/docs/dagster-plus/features/code-locations/index.md rename to docs/docs-beta/docs/dagster-plus/deployment/code-locations/index.md index 946b831d0b34e..facde90edf2aa 100644 --- a/docs/docs-beta/docs/dagster-plus/features/code-locations/index.md +++ b/docs/docs-beta/docs/dagster-plus/deployment/code-locations/index.md @@ -1,6 +1,6 @@ --- title: "Code locations" -sidebar_position: 20 +sidebar_position: 40 --- Separate code locations allow you to deploy different projects that still roll up into a single Dagster+ deployment with one global lineage graph. diff --git a/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/hybrid/amazon-ecs/index.md b/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/hybrid/amazon-ecs/index.md index ade5c5b935ffa..0ab0a059c490b 100644 --- a/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/hybrid/amazon-ecs/index.md +++ b/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/hybrid/amazon-ecs/index.md @@ -1,6 +1,6 @@ --- title: Amazon ECS agent -sidebar_position: 50 +sidebar_position: 30 --- import DocCardList from '@theme/DocCardList'; diff --git a/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/hybrid/architecture.md b/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/hybrid/architecture.md index 7c21d41a38430..8afa1ecb5e76a 100644 --- a/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/hybrid/architecture.md +++ b/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/hybrid/architecture.md @@ -5,25 +5,13 @@ sidebar_position: 10 The Hybrid architecture is the most flexible and secure way to deploy Dagster+. It allows you to run your user code in your environment while leveraging Dagster+'s infrastructure for orchestration and metadata management -
- Pre-requisites - -Before you begin, you should have: - -- A [Dagster+ account](/dagster-plus/getting-started) -- [Basic familiarity with Dagster](/getting-started/quickstart) - -
- ---- - ## Hybrid architecture overview A **hybrid deployment** utilizes a combination of your infrastructure and Dagster-hosted backend services. -The Dagster backend services - including the web frontend, GraphQL API, metadata database, and daemons (responsible for executing schedules and sensors) - are hosted in Dagster+. You are responsible for running an [agent](/todo) in your environment. +The Dagster backend services - including the web frontend, GraphQL API, metadata database, and daemons (responsible for executing schedules and sensors) - are hosted in Dagster+. You are responsible for running an [agent](index.md#dagster-hybrid-agents) in your environment. -![Dagster+ Hybrid deployment architecture](/img/placeholder.svg) +![Dagster+ Hybrid deployment architecture](/images/dagster-cloud/deployment/hybrid-architecture.png) Work is enqueued for your agent when: @@ -35,27 +23,31 @@ The agent polls the agent API to see if any work needs to be done and launches u All user code runs within your environment, in isolation from Dagster system code. ---- - ## The agent Because the agent communicates with the Dagster+ control plane over the agent API, it's possible to support agents that operate in arbitrary compute environments. This means that over time, Dagster+'s support for different user deployment environments will expand and custom agents can take advantage of bespoke compute environments such as HPC. -Refer to the [Agents documentation](/todo) for more info, including the agents that are currently supported. - ---- +See the [setup page](index.md#dagster-hybrid-agents) for a list of agents that are currently supported. ## Security -This section describes how Dagster+ interacts with user code. To summarize: +Dagster+ Hybrid relies on a shared security model. + +The Dagster+ control plane is SOC 2 Type II certified and follows best practices such as: +- encrypting data at rest (AES 256) and in transit (TLS 1.2+) +- highly available, with disaster recovery and backup strategies +- only manages metadata such as pipeline names, execution status, and run duration + +The execution environment is managed by the customer: +- Dagster+ doesn't have access to user code—your code never leaves your environment. Metadata about the code is fetched over constrained APIs. +- All connections to databases, file systems, and other resources are made from your environment. +- The execution environment only requires egress access to Dagster+. No ingress is required from Dagster+ to user environments. -- No ingress is required from Dagster+ to user environments -- Dagster+ doesn't have access to user code. Metadata about the code is fetched over constrained APIs. -- The Dagster+ agent is [open source and auditable](https://github.com/dagster-io/dagster-cloud) +Additionally, the Dagster+ agent is [open source and auditable](https://github.com/dagster-io/dagster-cloud) -These highlights are described in more detail below: +The following highlights are described in more detail below: - [Interactions and queries](#interactions-and-queries) - [Runs](#runs) diff --git a/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/hybrid/docker/index.md b/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/hybrid/docker/index.md index cb00c69f21296..81ebfa807f20a 100644 --- a/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/hybrid/docker/index.md +++ b/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/hybrid/docker/index.md @@ -1,6 +1,6 @@ --- title: Docker agent -sidebar_position: 30 +sidebar_position: 40 --- import DocCardList from '@theme/DocCardList'; diff --git a/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/hybrid/index.md b/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/hybrid/index.md index f726f15d4a12b..5a4c660304db3 100644 --- a/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/hybrid/index.md +++ b/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/hybrid/index.md @@ -6,25 +6,26 @@ sidebar_position: 20 In a Dagster+ Hybrid deployment, the orchestration control plane is run by Dagster+ while your Dagster code is executed within your environment. -[comment]: <> (TODO: Architecture diagram) +:::note +For an overview of the Hybrid design, including security considerations, see [Dagster+ Hybrid architecture](architecture.md). +::: ## Get started -To get started with a Hybrid deployment you'll need to: +To get started with a Hybrid deployment, you'll need to: 1. Create a [Dagster+ organization](https://dagster.cloud/signup) -2. Install a Dagster+ Hybrid Agent -3. [Add a code location](/dagster-plus/features/code-locations), typically using a Git repository and CI/CD +2. [Install a Dagster+ Hybrid agent](#dagster-hybrid-agents) +3. [Add a code location](/dagster-plus/deployment/code-locations), typically using a Git repository and CI/CD ## Dagster+ Hybrid agents -The Dagster+ agent is a long-lived process that polls Dagster+'s API servers for new work. +The Dagster+ agent is a long-lived process that polls Dagster+'s API servers for new work. Currently supported agents include: -See the following guides for setting up an agent: - [Kubernetes](/dagster-plus/deployment/deployment-types/hybrid/kubernetes) - [AWS ECS](/dagster-plus/deployment/deployment-types/hybrid/amazon-ecs/new-vpc) - [Docker](/dagster-plus/deployment/deployment-types/hybrid/docker) - - [Locally](/dagster-plus/deployment/deployment-types/hybrid/local) + - [Local agent](/dagster-plus/deployment/deployment-types/hybrid/local) ## What you'll see in your environment @@ -44,20 +45,10 @@ When a run needs to be launched, Dagster+ enqueues instructions for your agent t Your agent will send Dagster+ metadata letting us know the run has been launched. Your run's container will also send Dagster+ metadata informing us of how the run is progressing. The Dagster+ backend services will monitor this stream of metadata to make additional orchestration decisions, monitor for failure, or send alerts. -## Security +## Best practices -Dagster+ hybrid relies on a shared security model. +### Security -The Dagster+ control plane is SOC 2 Type II certified and follows best practices such as: -- encrypting data at rest (AES 256) and in transit (TLS 1.2+) -- highly available, with disaster recovery and backup strategies -- only manages metadata such as pipeline names, execution status, and run duration - -The execution environment is managed by the customer: -- your code never leaves your environment -- all connections to databases, file systems, and other resources are made from your environment -- the execution environment only requires egress access to Dagster+ - -Common security considerations in Dagster+ hybrid include: -- [disabling log forwarding](/todo) -- [managing tokens](/todo) +You can do the following to make your Dagster+ Hybrid deployment more secure: +- [Disable log forwarding](/dagster-plus/deployment/management/settings/customizing-agent-settings#disabling-compute-logs) +- [Manage tokens](/dagster-plus/deployment/management/tokens/agent-tokens) diff --git a/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/hybrid/kubernetes/index.md b/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/hybrid/kubernetes/index.md index d744a447c3b87..3b4e765984d84 100644 --- a/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/hybrid/kubernetes/index.md +++ b/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/hybrid/kubernetes/index.md @@ -1,6 +1,6 @@ --- title: Kubernetes agent -sidebar_position: 40 +sidebar_position: 20 --- import DocCardList from '@theme/DocCardList'; diff --git a/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/hybrid/kubernetes/setup.md b/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/hybrid/kubernetes/setup.md index 69162ce92d7eb..f6cd53f5dbb09 100644 --- a/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/hybrid/kubernetes/setup.md +++ b/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/hybrid/kubernetes/setup.md @@ -442,7 +442,7 @@ Another option is to launch a pod for each asset by telling Dagster to use the K -Dagster can launch and manage existing Docker images as Kubernetes jobs using the [Dagster kubernetes pipes integration](/integrations/kubernetes). To request resources for these jobs by supplying the appropriate Kubernetes pod spec. +Dagster can launch and manage existing Docker images as Kubernetes jobs using the [Dagster kubernetes pipes integration](/integrations/libraries/kubernetes). To request resources for these jobs by supplying the appropriate Kubernetes pod spec. diff --git a/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/hybrid/local.md b/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/hybrid/local.md index 53c43ba09d468..dc199cc03e4a8 100644 --- a/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/hybrid/local.md +++ b/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/hybrid/local.md @@ -1,6 +1,6 @@ --- title: Running a local agent -sidebar_position: 20 +sidebar_position: 50 sidebar_label: Local agent --- diff --git a/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/serverless/ci-cd-in-serverless.md b/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/serverless/ci-cd-in-serverless.md index 265a378ea82b0..dc094da1f98fb 100644 --- a/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/serverless/ci-cd-in-serverless.md +++ b/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/serverless/ci-cd-in-serverless.md @@ -83,5 +83,3 @@ dagster-cloud serverless deploy-python-executable ./my-dagster-project \ - ---- diff --git a/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/serverless/index.md b/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/serverless/index.md index 02b9ce40ada82..672ded0e54bbc 100644 --- a/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/serverless/index.md +++ b/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/serverless/index.md @@ -8,9 +8,7 @@ sidebar_position: 10 Dagster+ Serverless is a fully managed version of Dagster+ and is the easiest way to get started with Dagster. With a Serverless deployment, you can run your Dagster jobs without spinning up any infrastructure yourself. ---- - -## When to choose Serverless \{#when-to-choose-serverless} +## Serverless vs Hybrid Serverless works best with workloads that primarily orchestrate other services or perform light computation. Most workloads fit into this category, especially those that orchestrate third-party SaaS products like cloud data warehouses and ETL tools. @@ -21,9 +19,7 @@ If any of the following are applicable, you should select [Hybrid deployment](/d - You need to distribute computation across many nodes for a single run. Dagster+ runs currently execute on a single node with 4 CPUs - You don't want to add Dagster Labs as a data processor ---- - -## Limitations \{#limitations} +## Limitations Serverless is subject to the following limitations: @@ -36,8 +32,6 @@ Serverless is subject to the following limitations: Dagster+ Pro customers may request a quota increase by [contacting Sales](https://dagster.io/contact). ---- - ## Next steps -To start using Dagster+ Serverless, follow our [Getting started with Dagster+](/dagster-plus/getting-started) guide. +To start using Dagster+ Serverless, follow the steps in [Getting started with Dagster+](/dagster-plus/getting-started). diff --git a/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/serverless/run-isolation.md b/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/serverless/run-isolation.md index c9c5d04ab6756..ec45953a7632f 100644 --- a/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/serverless/run-isolation.md +++ b/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/serverless/run-isolation.md @@ -15,8 +15,6 @@ To follow the steps in this guide, you'll need: - An understanding of [Dagster+ deployment settings](/dagster-plus/deployment/management/settings/deployment-settings) ---- - ## Differences between isolated and non-isolated runs - [**Isolated runs**](#isolated-runs-default) execute in their own container. They're the default and are intended for production and compute-heavy use cases. diff --git a/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/serverless/runtime-environment.md b/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/serverless/runtime-environment.md index 750eefa59919a..ae13107a69f9b 100644 --- a/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/serverless/runtime-environment.md +++ b/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/serverless/runtime-environment.md @@ -7,13 +7,13 @@ sidebar_position: 100 By default, Dagster+ Serverless will package your code as PEX files and deploys them on Docker images. Using PEX files significantly reduces the time to deploy since it does not require building a new Docker image and provisioning a new container for every code change. However you are able to customize the Serverless runtime environment in various ways: - [Add dependencies](#add-dependencies) -- [Use a different Python version](#python-version) -- [Use a different base image](#base-image) -- [Include data files](#data-files) -- [Disable PEX deploys](#disable-pex) -- [Use private Python packages](#private-packages) +- [Use a different Python version](#use-a-different-python-version) +- [Use a different base image](#use-a-different-base-image) +- [Include data files](#include-data-files) +- [Disable PEX deploys](#disable-pex-deploys) +- [Use private Python packages](#use-private-python-packages) -## Add dependencies \{#add-dependencies} +## Add dependencies You can add dependencies by including the corresponding Python libraries in your Dagster project's `setup.py` file. These should follow [PEP 508](https://peps.python.org/pep-0508/). @@ -39,9 +39,9 @@ setup( ) ``` -To add a package from a private GitHub repository, see: [Use private Python packages](#private-packages) +To add a package from a private GitHub repository, see [Use private Python packages](#use-private-python-packages) -## Use a different Python version \{#python-version} +## Use a different Python version The default Python version for Dagster+ Serverless is Python 3.9. Python versions 3.10 through 3.12 are also supported. You can specify the Python version you want to use in your GitHub or GitLab workflow, or by using the `dagster-cloud` CLI. @@ -70,7 +70,7 @@ dagster-cloud serverless deploy-python-executable --python-version=3.11 --locati -## Use a different base image \{#base-image} +## Use a different base image Dagster+ runs your code on a Docker image that we build as follows: @@ -117,7 +117,7 @@ Setting a custom base image isn't supported for GitLab CI/CD workflows out of th -## Include data files \{#data-files} +## Include data files To add data files to your deployment, use the [Data Files Support](https://setuptools.pypa.io/en/latest/userguide/datafiles.html) built into Python's `setup.py`. This requires adding a `package_data` or `include_package_data` keyword in the call to `setup()` in `setup.py`. For example, given this directory structure: @@ -134,7 +134,7 @@ To add data files to your deployment, use the [Data Files Support](https://setup If you want to include the data folder, modify your `setup.py` to add the `package_data` line: -## Disable PEX deploys \{#disable-pex} +## Disable PEX deploys You have the option to disable PEX-based deploys and deploy using a Docker image instead of PEX. You can disable PEX in your GitHub or GitLab workflow, or by using the `dagster-cloud` CLI. @@ -200,7 +200,7 @@ Setting a custom base image isn't supported for GitLab CI/CD workflows out of th -## Use private Python packages \{#private-packages} +## Use private Python packages If you use PEX deploys in your workflow (`ENABLE_FAST_DEPLOYS: 'true'`), the following steps can install a package from a private GitHub repository, e.g. `my-org/private-repo`, as a dependency: diff --git a/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/serverless/security.md b/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/serverless/security.md index bd1b23f2e1c6e..1996c5fd07abf 100644 --- a/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/serverless/security.md +++ b/docs/docs-beta/docs/dagster-plus/deployment/deployment-types/serverless/security.md @@ -32,8 +32,6 @@ To prevent this, you can use [another I/O manager](/guides/build/configure/io-ma You must have [boto3](https://pypi.org/project/boto3/) or `dagster-cloud[serverless]` installed as a project dependency otherwise the Dagster+ managed storage can fail and silently fall back to using the default I/O manager. ::: -## Adding environment variables and secrets \{#adding-secrets} +## Adding environment variables and secrets Often you'll need to securely access secrets from your jobs. Dagster+ supports several methods for adding secrets—refer to the [Dagster+ environment variables documentation](/dagster-plus/deployment/management/environment-variables) for more information. - ---- diff --git a/docs/docs-beta/docs/dagster-plus/deployment/management/settings/customizing-agent-settings.md b/docs/docs-beta/docs/dagster-plus/deployment/management/settings/customizing-agent-settings.md index df595ee48e153..e84fb830f287d 100644 --- a/docs/docs-beta/docs/dagster-plus/deployment/management/settings/customizing-agent-settings.md +++ b/docs/docs-beta/docs/dagster-plus/deployment/management/settings/customizing-agent-settings.md @@ -4,4 +4,8 @@ sidebar_position: 80 unlisted: true --- -{/* TODO move from https://docs.dagster.io/dagster-plus/deployment/agents/customizing-configuration */} \ No newline at end of file +{/* TODO move from https://docs.dagster.io/dagster-plus/deployment/agents/customizing-configuration */} + +## Disabling compute logs + +{/* NOTE this is a placeholder section so the Hybrid deployment index page has somewhere to link to */} diff --git a/docs/docs-beta/docs/dagster-plus/features/authentication-and-access-control/rbac/user-roles-permissions.md b/docs/docs-beta/docs/dagster-plus/features/authentication-and-access-control/rbac/user-roles-permissions.md index 5b2ce29700e46..05b98b558c1e1 100644 --- a/docs/docs-beta/docs/dagster-plus/features/authentication-and-access-control/rbac/user-roles-permissions.md +++ b/docs/docs-beta/docs/dagster-plus/features/authentication-and-access-control/rbac/user-roles-permissions.md @@ -42,7 +42,7 @@ Dagster+ Pro users can create teams of users and assign default permission sets. With the exception of the **Organization Admin** role, user and team roles are set on a per-deployment basis. -Organization Admins have access to the entire organization, including all [deployments](/todo), [code locations](/dagster-plus/features/code-locations), and [Branch Deployments](dagster-plus/features/ci-cd/branch-deployments/index.md). +Organization Admins have access to the entire organization, including all [deployments](/todo), [code locations](/dagster-plus/deployment/code-locations), and [Branch Deployments](dagster-plus/features/ci-cd/branch-deployments/index.md). | Level | Plan | Description | | ------------------ | --------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | diff --git a/docs/docs-beta/docs/dagster-plus/getting-started.md b/docs/docs-beta/docs/dagster-plus/getting-started.md index 9be3ffb5f46d5..468e1a248869e 100644 --- a/docs/docs-beta/docs/dagster-plus/getting-started.md +++ b/docs/docs-beta/docs/dagster-plus/getting-started.md @@ -2,12 +2,16 @@ title: "Getting started with Dagster+" --- -First [create a Dagster+ organization](https://dagster.plus/signup). Note: you can sign up with: +To get started with Dagster+, you will need to create a Dagster+ organization and choose your deployment type (Serverless or Hybrid). + +## Create a Dagster+ organization + +First, [create a Dagster+ organization](https://dagster.plus/signup). You can sign up with: - a Google email address - a GitHub account -- a one-time email link, great if you are using a corporate email. You can setup SSO after completing these steps. +- a one-time email link (ideal if you are using a corporate email). You can set up SSO after completing these steps. -Next, pick your deployment type. Not sure? +## Choose your deployment type - [Dagster+ Serverless](/dagster-plus/deployment/deployment-types/serverless) is the easiest way to get started and is great for teams with limited DevOps support. In Dagster+ Serverless, your Dagster code is executed in Dagster+. You will need to be okay [giving Dagster+ the credentials](/dagster-plus/deployment/management/environment-variables) to connect to the tools you want to orchestrate. @@ -20,14 +24,12 @@ The remaining steps depend on your deployment type. We recommend following the steps in Dagster+ to add a new project. -![Screenshot of Dagster+ serverless NUX](/img/placeholder.svg) - -The Dagster+ on-boarding will guide you through: +The Dagster+ onboarding will guide you through: - creating a Git repository containing your Dagster code - setting up the necessary CI/CD actions to deploy that repository to Dagster+ :::tip -If you don't have any Dagster code yet, you will have the option to select an example quickstart project or import an existing dbt project +If you don't have any Dagster code yet, you can select an example project or import an existing dbt project. ::: See the guide on [adding code locations](/dagster-plus/features/code-locations) for details. @@ -35,12 +37,12 @@ See the guide on [adding code locations](/dagster-plus/features/code-locations) -## Install a Dagster+ Hybrid agent +**Install a Dagster+ Hybrid agent** -Follow [these guides](/dagster-plus/deployment/deployment-types/hybrid) for installing a Dagster+ Hybrid agent. Not sure which agent to pick? We recommend using the Dagster+ Kubernetes agent in most cases. +Follow [these guides](/dagster-plus/deployment/deployment-types/hybrid) for installing a Dagster+ Hybrid agent. If you're not sure which agent to use, we recommend the [Dagster+ Kubernetes agent](/dagster-plus/deployment/deployment-types/hybrid/kubernetes/index.md) in most cases. -## Setup CI/CD +**Set up CI/CD** In most cases, your CI/CD process will be responsible for: - building your Dagster code into a Docker image diff --git a/docs/docs-beta/docs/guides/build/integrate/ingesting-data.md b/docs/docs-beta/docs/guides/build/integrate/ingesting-data.md index a6290c8c37c1d..7d4d0467ff063 100644 --- a/docs/docs-beta/docs/guides/build/integrate/ingesting-data.md +++ b/docs/docs-beta/docs/guides/build/integrate/ingesting-data.md @@ -33,10 +33,10 @@ As a data orchestrator, Dagster helps with data ingestion as it can: Dagster currently integrates with the following data ingestion tools, enabling you to sync diverse data sources into data warehouse tables using pre-built connectors: -- [Airbyte](/integrations/airbyte) -- [dlt](/integrations/dlt) -- [Fivetran](/integrations/fivetran) -- [Sling](/integrations/sling) +- [Airbyte](/integrations/libraries/airbyte) +- [dlt](/integrations/libraries/dlt) +- [Fivetran](/integrations/libraries/fivetran) +- [Sling](/integrations/libraries/sling) ## Writing custom data ingestion pipelines diff --git a/docs/docs-beta/docs/guides/operate/index.md b/docs/docs-beta/docs/guides/operate/index.md deleted file mode 100644 index 6d95bde0b7fb9..0000000000000 --- a/docs/docs-beta/docs/guides/operate/index.md +++ /dev/null @@ -1,8 +0,0 @@ ---- -title: Operate -sidebar_class_name: hidden ---- - -import DocCardList from '@theme/DocCardList'; - - \ No newline at end of file diff --git a/docs/docs-beta/docs/guides/test/asset-checks.md b/docs/docs-beta/docs/guides/test/asset-checks.md index 9b1540dc2447b..b2feaead928be 100644 --- a/docs/docs-beta/docs/guides/test/asset-checks.md +++ b/docs/docs-beta/docs/guides/test/asset-checks.md @@ -33,7 +33,7 @@ To get started with asset checks, follow these general steps: ## Defining a single asset check \{#single-check} :::tip -Dagster's dbt integration can model existing dbt tests as asset checks. Refer to the [dagster-dbt documentation](/integrations/dbt) for more information. +Dagster's dbt integration can model existing dbt tests as asset checks. Refer to the [dagster-dbt documentation](/integrations/libraries/dbt) for more information. ::: A asset check is defined using the `@asset_check` decorator. diff --git a/docs/docs-beta/docs/integrations/fivetran.md b/docs/docs-beta/docs/integrations/fivetran.md deleted file mode 100644 index 9235e4e3788e5..0000000000000 --- a/docs/docs-beta/docs/integrations/fivetran.md +++ /dev/null @@ -1,35 +0,0 @@ ---- -layout: Integration -status: published -name: Fivetran -title: Dagster & Fivetran -sidebar_label: Fivetran -excerpt: Orchestrate Fivetran connectors and schedule syncs with upstream or downstream dependencies. -date: 2022-11-07 -apireflink: https://docs.dagster.io/_apidocs/libraries/dagster-fivetran -docslink: https://docs.dagster.io/integrations/fivetran -partnerlink: https://www.fivetran.com/ -logo: /integrations/Fivetran.svg -categories: - - ETL -enabledBy: -enables: ---- - -### About this integration - -The Dagster-Fivetran integration enables you to orchestrate data ingestion as part of a larger pipeline. Programmatically interact with the Fivetran REST API to initiate syncs and monitor their progress. - -### Installation - -```bash -pip install dagster-fivetran -``` - -### Example - - - -### About Fivetran - -**Fivetran** ingests data from SaaS applications, databases, and servers. The data is stored and typically used for analytics. diff --git a/docs/docs-beta/docs/integrations/guides/multi-asset-integration.md b/docs/docs-beta/docs/integrations/guides/multi-asset-integration.md new file mode 100644 index 0000000000000..df0615e217fab --- /dev/null +++ b/docs/docs-beta/docs/integrations/guides/multi-asset-integration.md @@ -0,0 +1,5 @@ +--- +title: Creating a multi-asset integration +--- + +{/* TODO write this */} \ No newline at end of file diff --git a/docs/docs-beta/docs/integrations/index.md b/docs/docs-beta/docs/integrations/index.md deleted file mode 100644 index 233bd07d0fdc0..0000000000000 --- a/docs/docs-beta/docs/integrations/index.md +++ /dev/null @@ -1,8 +0,0 @@ ---- -sidebar_class_name: hidden -title: Integrations ---- - -import DocCardList from '@theme/DocCardList'; - - diff --git a/docs/docs-beta/docs/integrations/airbyte.md b/docs/docs-beta/docs/integrations/libraries/airbyte.md similarity index 97% rename from docs/docs-beta/docs/integrations/airbyte.md rename to docs/docs-beta/docs/integrations/libraries/airbyte.md index eec45c87481fb..36e160f2986d9 100644 --- a/docs/docs-beta/docs/integrations/airbyte.md +++ b/docs/docs-beta/docs/integrations/libraries/airbyte.md @@ -14,9 +14,10 @@ categories: - ETL enabledBy: enables: +tags: [dagster-supported, etl] --- -### About this integration + Using this integration, you can trigger Airbyte syncs and orchestrate your Airbyte connections from within Dagster, making it easy to chain an Airbyte sync with upstream or downstream steps in your workflow. diff --git a/docs/docs-beta/docs/integrations/aws/athena.md b/docs/docs-beta/docs/integrations/libraries/aws/athena.md similarity index 97% rename from docs/docs-beta/docs/integrations/aws/athena.md rename to docs/docs-beta/docs/integrations/libraries/aws/athena.md index e17f95e077147..f28c6a5a8fae3 100644 --- a/docs/docs-beta/docs/integrations/aws/athena.md +++ b/docs/docs-beta/docs/integrations/libraries/aws/athena.md @@ -14,9 +14,10 @@ categories: - Storage enabledBy: enables: +tags: [dagster-supported, storage] --- -### About this integration + This integration allows you to connect to AWS Athena, a serverless interactive query service that makes it easy to analyze data in Amazon S3 using standard SQL. Using this integration, you can issue queries to Athena, fetch results, and handle query execution states within your Dagster pipelines. diff --git a/docs/docs-beta/docs/integrations/aws/cloudwatch.md b/docs/docs-beta/docs/integrations/libraries/aws/cloudwatch.md similarity index 98% rename from docs/docs-beta/docs/integrations/aws/cloudwatch.md rename to docs/docs-beta/docs/integrations/libraries/aws/cloudwatch.md index 6f31e5b7fbf02..8f27e767cd512 100644 --- a/docs/docs-beta/docs/integrations/aws/cloudwatch.md +++ b/docs/docs-beta/docs/integrations/libraries/aws/cloudwatch.md @@ -14,9 +14,10 @@ categories: - Monitoring enabledBy: enables: +tags: [dagster-supported, monitoring] --- -### About this integration + This integration allows you to send Dagster logs to AWS CloudWatch, enabling centralized logging and monitoring of your Dagster jobs. By using AWS CloudWatch, you can take advantage of its powerful log management features, such as real-time log monitoring, log retention policies, and alerting capabilities. diff --git a/docs/docs-beta/docs/integrations/aws/ecr.md b/docs/docs-beta/docs/integrations/libraries/aws/ecr.md similarity index 98% rename from docs/docs-beta/docs/integrations/aws/ecr.md rename to docs/docs-beta/docs/integrations/libraries/aws/ecr.md index dfaec5dea91f8..a3b2487794a0b 100644 --- a/docs/docs-beta/docs/integrations/aws/ecr.md +++ b/docs/docs-beta/docs/integrations/libraries/aws/ecr.md @@ -14,9 +14,10 @@ categories: - Other enabledBy: enables: +tags: [dagster-supported] --- -### About this integration + This integration allows you to connect to AWS Elastic Container Registry (ECR). It provides resources to interact with AWS ECR, enabling you to manage your container images. diff --git a/docs/docs-beta/docs/integrations/aws/emr.md b/docs/docs-beta/docs/integrations/libraries/aws/emr.md similarity index 98% rename from docs/docs-beta/docs/integrations/aws/emr.md rename to docs/docs-beta/docs/integrations/libraries/aws/emr.md index 4a055872d2a1f..db5ba4e4bc91d 100644 --- a/docs/docs-beta/docs/integrations/aws/emr.md +++ b/docs/docs-beta/docs/integrations/libraries/aws/emr.md @@ -14,9 +14,10 @@ categories: - Compute enabledBy: enables: +tags: [dagster-supported, compute] --- -### About this integration + The `dagster-aws` integration provides ways orchestrating data pipelines that leverage AWS services, including AWS EMR (Elastic MapReduce). This integration allows you to run and scale big data workloads using open source tools such as Apache Spark, Hive, Presto, and more. diff --git a/docs/docs-beta/docs/integrations/aws/glue.md b/docs/docs-beta/docs/integrations/libraries/aws/glue.md similarity index 97% rename from docs/docs-beta/docs/integrations/aws/glue.md rename to docs/docs-beta/docs/integrations/libraries/aws/glue.md index e06ce1494ba57..629df3adc218d 100644 --- a/docs/docs-beta/docs/integrations/aws/glue.md +++ b/docs/docs-beta/docs/integrations/libraries/aws/glue.md @@ -14,9 +14,10 @@ categories: - Compute enabledBy: enables: +tags: [dagster-supported, compute] --- -### About this integration + The `dagster-aws` integration library provides the `PipesGlueClient` resource, enabling you to launch AWS Glue jobs directly from Dagster assets and ops. This integration allows you to pass parameters to Glue code while Dagster receives real-time events, such as logs, asset checks, and asset materializations, from the initiated jobs. With minimal code changes required on the job side, this integration is both efficient and easy to implement. diff --git a/docs/docs-beta/docs/integrations/aws/index.md b/docs/docs-beta/docs/integrations/libraries/aws/index.md similarity index 80% rename from docs/docs-beta/docs/integrations/aws/index.md rename to docs/docs-beta/docs/integrations/libraries/aws/index.md index 481931c7b81e2..a95102a5e87c8 100644 --- a/docs/docs-beta/docs/integrations/aws/index.md +++ b/docs/docs-beta/docs/integrations/libraries/aws/index.md @@ -4,4 +4,4 @@ title: AWS import DocCardList from '@theme/DocCardList'; - + \ No newline at end of file diff --git a/docs/docs-beta/docs/integrations/aws/lambda.md b/docs/docs-beta/docs/integrations/libraries/aws/lambda.md similarity index 97% rename from docs/docs-beta/docs/integrations/aws/lambda.md rename to docs/docs-beta/docs/integrations/libraries/aws/lambda.md index 4dd4ba58e903d..5ec9c7c9c64ad 100644 --- a/docs/docs-beta/docs/integrations/aws/lambda.md +++ b/docs/docs-beta/docs/integrations/libraries/aws/lambda.md @@ -14,9 +14,10 @@ categories: - Compute enabledBy: enables: +tags: [dagster-supported, compute] --- -### About this integration + Using this integration, you can leverage AWS Lambda to execute external code as part of your Dagster pipelines. This is particularly useful for running serverless functions that can scale automatically and handle various workloads without the need for managing infrastructure. The `PipesLambdaClient` class allows you to invoke AWS Lambda functions and stream logs and structured metadata back to Dagster's UI and tools. diff --git a/docs/docs-beta/docs/integrations/aws/redshift.md b/docs/docs-beta/docs/integrations/libraries/aws/redshift.md similarity index 97% rename from docs/docs-beta/docs/integrations/aws/redshift.md rename to docs/docs-beta/docs/integrations/libraries/aws/redshift.md index 053bd366b417f..593516db98136 100644 --- a/docs/docs-beta/docs/integrations/aws/redshift.md +++ b/docs/docs-beta/docs/integrations/libraries/aws/redshift.md @@ -14,9 +14,10 @@ categories: - Storage enabledBy: enables: +tags: [dagster-supported, storage] --- -### About this integration + Using this integration, you can connect to an AWS Redshift cluster and issue queries against it directly from your Dagster assets. This allows you to seamlessly integrate Redshift into your data pipelines, leveraging the power of Redshift's data warehousing capabilities within your Dagster workflows. diff --git a/docs/docs-beta/docs/integrations/aws/s3.md b/docs/docs-beta/docs/integrations/libraries/aws/s3.md similarity index 97% rename from docs/docs-beta/docs/integrations/aws/s3.md rename to docs/docs-beta/docs/integrations/libraries/aws/s3.md index e617605730442..71e35378e38b1 100644 --- a/docs/docs-beta/docs/integrations/aws/s3.md +++ b/docs/docs-beta/docs/integrations/libraries/aws/s3.md @@ -14,9 +14,10 @@ categories: - Storage enabledBy: enables: +tags: [dagster-supported, storage] --- -### About this integration + The AWS S3 integration allows data engineers to easily read, and write objects to the durable AWS S3 storage -- enabling engineers to a resilient storage layer when constructing their pipelines. diff --git a/docs/docs-beta/docs/integrations/aws/secretsmanager.md b/docs/docs-beta/docs/integrations/libraries/aws/secretsmanager.md similarity index 98% rename from docs/docs-beta/docs/integrations/aws/secretsmanager.md rename to docs/docs-beta/docs/integrations/libraries/aws/secretsmanager.md index 736b84fc56fb4..48b3b007bf1cd 100644 --- a/docs/docs-beta/docs/integrations/aws/secretsmanager.md +++ b/docs/docs-beta/docs/integrations/libraries/aws/secretsmanager.md @@ -14,9 +14,10 @@ categories: - Other enabledBy: enables: +tags: [dagster-supported] --- -### About this integration + This integration allows you to manage, retrieve, and rotate credentials, API keys, and other secrets using [AWS Secrets Manager](https://aws.amazon.com/secrets-manager/). diff --git a/docs/docs-beta/docs/integrations/aws/ssm.md b/docs/docs-beta/docs/integrations/libraries/aws/ssm.md similarity index 98% rename from docs/docs-beta/docs/integrations/aws/ssm.md rename to docs/docs-beta/docs/integrations/libraries/aws/ssm.md index 36f480a509482..d0da33d8d7e2f 100644 --- a/docs/docs-beta/docs/integrations/aws/ssm.md +++ b/docs/docs-beta/docs/integrations/libraries/aws/ssm.md @@ -14,9 +14,10 @@ categories: - Other enabledBy: enables: +tags: [dagster-supported] --- -### About this integration + The Dagster AWS Systems Manager (SSM) Parameter Store integration allows you to manage and retrieve parameters stored in AWS SSM Parameter Store directly within your Dagster pipelines. This integration provides resources to fetch parameters by name, tags, or paths, and optionally set them as environment variables for your operations. diff --git a/docs/docs-beta/docs/integrations/azure-adls2.md b/docs/docs-beta/docs/integrations/libraries/azure-adls2.md similarity index 97% rename from docs/docs-beta/docs/integrations/azure-adls2.md rename to docs/docs-beta/docs/integrations/libraries/azure-adls2.md index 9f766bac09d09..780b800aa62dc 100644 --- a/docs/docs-beta/docs/integrations/azure-adls2.md +++ b/docs/docs-beta/docs/integrations/libraries/azure-adls2.md @@ -14,9 +14,10 @@ categories: - Storage enabledBy: enables: +tags: [dagster-supported, storage] --- -### About this integration + Dagster helps you use Azure Storage Accounts as part of your data pipeline. Azure Data Lake Storage Gen 2 (ADLS2) is our primary focus but we also provide utilities for Azure Blob Storage. diff --git a/docs/docs-beta/docs/integrations/census.md b/docs/docs-beta/docs/integrations/libraries/census.md similarity index 96% rename from docs/docs-beta/docs/integrations/census.md rename to docs/docs-beta/docs/integrations/libraries/census.md index ae13177111531..1c3e4f8e2c903 100644 --- a/docs/docs-beta/docs/integrations/census.md +++ b/docs/docs-beta/docs/integrations/libraries/census.md @@ -14,9 +14,10 @@ categories: - ETL enabledBy: enables: +tags: [community-supported, etl] --- -### About this integration + With the `dagster-census` integration you can execute a Census sync and poll until that sync completes, raising an error if it's unsuccessful. diff --git a/docs/docs-beta/docs/integrations/cube.md b/docs/docs-beta/docs/integrations/libraries/cube.md similarity index 97% rename from docs/docs-beta/docs/integrations/cube.md rename to docs/docs-beta/docs/integrations/libraries/cube.md index 9bac9cd168c7b..e1e976090adf0 100644 --- a/docs/docs-beta/docs/integrations/cube.md +++ b/docs/docs-beta/docs/integrations/libraries/cube.md @@ -14,9 +14,10 @@ categories: - Other enabledBy: enables: +tags: [community-supported] --- -### About this integration + With the `dagster_cube` integration you can setup Cube and Dagster to work together so that Dagster can push changes from upstream data sources to Cube using its integration API. diff --git a/docs/docs-beta/docs/integrations/databricks.md b/docs/docs-beta/docs/integrations/libraries/databricks.md similarity index 98% rename from docs/docs-beta/docs/integrations/databricks.md rename to docs/docs-beta/docs/integrations/libraries/databricks.md index 92ee6a0fc6a78..ff4736c3c0c4e 100644 --- a/docs/docs-beta/docs/integrations/databricks.md +++ b/docs/docs-beta/docs/integrations/libraries/databricks.md @@ -14,9 +14,10 @@ categories: - Compute enabledBy: enables: +tags: [dagster-supported, compute] --- -### About this integration + The `dagster-databricks` integration library provides the `PipesDatabricksClient` resource, enabling you to launch Databricks jobs directly from Dagster assets and ops. This integration allows you to pass parameters to Databricks code while Dagster receives real-time events, such as logs, asset checks, and asset materializations, from the initiated jobs. With minimal code changes required on the job side, this integration is both efficient and easy to implement. diff --git a/docs/docs-beta/docs/integrations/datadog.md b/docs/docs-beta/docs/integrations/libraries/datadog.md similarity index 96% rename from docs/docs-beta/docs/integrations/datadog.md rename to docs/docs-beta/docs/integrations/libraries/datadog.md index 9bac8f21649b5..4f2e867eb2fd7 100644 --- a/docs/docs-beta/docs/integrations/datadog.md +++ b/docs/docs-beta/docs/integrations/libraries/datadog.md @@ -14,9 +14,10 @@ categories: - Monitoring enabledBy: enables: +tags: [dagster-supported, monitoring] --- -### About this integration + While Dagster provides comprehensive monitoring and observability of the pipelines it orchestrates, many teams look to centralize all their monitoring across apps, processes and infrastructure using Datadog's 'Cloud Monitoring as a Service'. The `dagster-datadog` integration allows you to publish metrics to Datadog from within Dagster ops. diff --git a/docs/docs-beta/docs/integrations/dbt-cloud.md b/docs/docs-beta/docs/integrations/libraries/dbt-cloud.md similarity index 97% rename from docs/docs-beta/docs/integrations/dbt-cloud.md rename to docs/docs-beta/docs/integrations/libraries/dbt-cloud.md index ae375f429e15e..80c07e9990d41 100644 --- a/docs/docs-beta/docs/integrations/dbt-cloud.md +++ b/docs/docs-beta/docs/integrations/libraries/dbt-cloud.md @@ -14,9 +14,10 @@ categories: - ETL enabledBy: enables: +tags: [dagster-supported, etl] --- -### About this integration + Dagster allows you to run dbt Cloud jobs alongside other technologies. You can schedule them to run as a step in a larger pipeline and manage them as a data asset. diff --git a/docs/docs-beta/docs/integrations/dbt.md b/docs/docs-beta/docs/integrations/libraries/dbt.md similarity index 98% rename from docs/docs-beta/docs/integrations/dbt.md rename to docs/docs-beta/docs/integrations/libraries/dbt.md index cd2b7f873e3f2..177dc9f4e45f3 100644 --- a/docs/docs-beta/docs/integrations/dbt.md +++ b/docs/docs-beta/docs/integrations/libraries/dbt.md @@ -14,9 +14,10 @@ categories: - ETL enabledBy: enables: +tags: [dagster-supported, etl] --- -### About this integration + Dagster orchestrates dbt alongside other technologies, so you can schedule dbt with Spark, Python, etc. in a single data pipeline. diff --git a/docs/docs-beta/docs/integrations/deltalake.md b/docs/docs-beta/docs/integrations/libraries/deltalake.md similarity index 97% rename from docs/docs-beta/docs/integrations/deltalake.md rename to docs/docs-beta/docs/integrations/libraries/deltalake.md index 175fc173c5534..3a3fc20373fef 100644 --- a/docs/docs-beta/docs/integrations/deltalake.md +++ b/docs/docs-beta/docs/integrations/libraries/deltalake.md @@ -15,9 +15,10 @@ categories: - Storage enabledBy: enables: +tags: [community-supported, storage] --- -### About this integration + Delta Lake is a great storage format for Dagster workflows. With this integration, you can use the Delta Lake I/O Manager to read and write your Dagster assets. diff --git a/docs/docs-beta/docs/integrations/dlt.md b/docs/docs-beta/docs/integrations/libraries/dlt.md similarity index 97% rename from docs/docs-beta/docs/integrations/dlt.md rename to docs/docs-beta/docs/integrations/libraries/dlt.md index 9381022348790..19c54360eea56 100644 --- a/docs/docs-beta/docs/integrations/dlt.md +++ b/docs/docs-beta/docs/integrations/libraries/dlt.md @@ -14,9 +14,10 @@ categories: - ETL enabledBy: enables: +tags: [dagster-supported, etl] --- -### About this integration + This integration allows you to use [dlt](https://dlthub.com/) to easily ingest and replicate data between systems through Dagster. diff --git a/docs/docs-beta/docs/integrations/docker.md b/docs/docs-beta/docs/integrations/libraries/docker.md similarity index 97% rename from docs/docs-beta/docs/integrations/docker.md rename to docs/docs-beta/docs/integrations/libraries/docker.md index 16bdd0ab960c0..6a65e2818cee9 100644 --- a/docs/docs-beta/docs/integrations/docker.md +++ b/docs/docs-beta/docs/integrations/libraries/docker.md @@ -14,9 +14,10 @@ categories: - Compute enabledBy: enables: +tags: [dagster-supported, compute] --- -### About this integration + The `dagster-docker` integration library provides the `PipesDockerClient` resource, enabling you to launch Docker containers and execute external code directly from Dagster assets and ops. This integration allows you to pass parameters to Docker containers while Dagster receives real-time events, such as logs, asset checks, and asset materializations, from the initiated jobs. With minimal code changes required on the job side, this integration is both efficient and easy to implement. diff --git a/docs/docs-beta/docs/integrations/duckdb.md b/docs/docs-beta/docs/integrations/libraries/duckdb.md similarity index 96% rename from docs/docs-beta/docs/integrations/duckdb.md rename to docs/docs-beta/docs/integrations/libraries/duckdb.md index 5335df37db094..e8097b5040ed8 100644 --- a/docs/docs-beta/docs/integrations/duckdb.md +++ b/docs/docs-beta/docs/integrations/libraries/duckdb.md @@ -14,9 +14,10 @@ categories: - Storage enabledBy: enables: +tags: [dagster-supported, storage] --- -### About this integration + This library provides an integration with the DuckDB database, and allows for an out-of-the-box [I/O Manager](https://docs.dagster.io/concepts/io-management/io-managers) so that you can make DuckDB your storage of choice. diff --git a/docs/docs-beta/docs/integrations/libraries/fivetran.md b/docs/docs-beta/docs/integrations/libraries/fivetran.md new file mode 100644 index 0000000000000..33baaa8275c07 --- /dev/null +++ b/docs/docs-beta/docs/integrations/libraries/fivetran.md @@ -0,0 +1,87 @@ +--- +layout: Integration +status: published +name: Fivetran +title: Using Dagster with Fivetran +sidebar_label: Fivetran +excerpt: Orchestrate Fivetran connectors syncs with upstream or downstream dependencies. +date: 2022-11-07 +apireflink: https://docs.dagster.io/_apidocs/libraries/dagster-fivetran +docslink: https://docs.dagster.io/integrations/fivetran +partnerlink: https://www.fivetran.com/ +logo: /integrations/Fivetran.svg +categories: + - ETL +enabledBy: +enables: +tags: [dagster-supported, etl] +--- + +This guide provides instructions for using Dagster with Fivetran using the `dagster-fivetran` library. Your Fivetran connector tables can be represented as assets in the Dagster asset graph, allowing you to track lineage and dependencies between Fivetran assets and data assets you are already modeling in Dagster. You can also use Dagster to orchestrate Fivetran connectors, allowing you to trigger syncs for these on a cadence or based on upstream data changes. + +## What you'll learn + +- How to represent Fivetran assets in the Dagster asset graph, including lineage to other Dagster assets. +- How to customize asset definition metadata for these Fivetran assets. +- How to materialize Fivetran connector tables from Dagster. +- How to customize how Fivetran connector tables are materialized. + +
+ Prerequisites + +- The `dagster` and `dagster-fivetran` libraries installed in your environment +- Familiarity with asset definitions and the Dagster asset graph +- Familiarity with Dagster resources +- Familiarity with Fivetran concepts, like connectors and connector tables +- A Fivetran workspace +- A Fivetran API key and API secret. For more information, see [Getting Started](https://fivetran.com/docs/rest-api/getting-started) in the Fivetran REST API documentation. + +
+ +## Set up your environment + +To get started, you'll need to install the `dagster` and `dagster-fivetran` Python packages: + +```bash +pip install dagster dagster-fivetran +``` + +## Represent Fivetran assets in the asset graph + +To load Fivetran assets into the Dagster asset graph, you must first construct a resource, which allows Dagster to communicate with your Fivetran workspace. You'll need to supply your account ID, API key and API secret. See [Getting Started](https://fivetran.com/docs/rest-api/getting-started) in the Fivetran REST API documentation for more information on how to create your API key and API secret. + +Dagster can automatically load all connector tables from your Fivetran workspace as asset specs. Call the function, which returns list of s representing your Fivetran assets. You can then include these asset specs in your object: + + + +### Sync and materialize Fivetran assets + +You can use Dagster to sync Fivetran connectors and materialize Fivetran connector tables. You can use the factory to create all assets definitions for your Fivetran workspace. + + + +### Customize the materialization of Fivetran assets + +If you want to customize the sync of your connectors, you can use the decorator to do so. This allows you to execute custom code before and after the call to the Fivetran sync. + + + +### Customize asset definition metadata for Fivetran assets + +By default, Dagster will generate asset specs for each Fivetran asset and populate default metadata. You can further customize asset properties by passing an instance of the custom to the function. + + + +Note that `super()` is called in each of the overridden methods to generate the default asset spec. It is best practice to generate the default asset spec before customizing it. + +You can pass an instance of the custom to the decorator or the factory. + +### Load Fivetran assets from multiple workspaces + +Definitions from multiple Fivetran workspaces can be combined by instantiating multiple resources and merging their specs. This lets you view all your Fivetran assets in a single asset graph: + + + +### About Fivetran + +**Fivetran** ingests data from SaaS applications, databases, and servers. The data is stored and typically used for analytics. \ No newline at end of file diff --git a/docs/docs-beta/docs/integrations/gcp/bigquery.md b/docs/docs-beta/docs/integrations/libraries/gcp/bigquery.md similarity index 96% rename from docs/docs-beta/docs/integrations/gcp/bigquery.md rename to docs/docs-beta/docs/integrations/libraries/gcp/bigquery.md index 3decbd33ad606..986a7f4070de2 100644 --- a/docs/docs-beta/docs/integrations/gcp/bigquery.md +++ b/docs/docs-beta/docs/integrations/libraries/gcp/bigquery.md @@ -14,9 +14,10 @@ categories: - Storage enabledBy: enables: +tags: [dagster-supported, storage] --- -### About this integration + The Google Cloud Platform BigQuery integration allows data engineers to easily query and store data in the BigQuery data warehouse through the use of the `BigQueryResource`. diff --git a/docs/docs-beta/docs/integrations/gcp/dataproc.md b/docs/docs-beta/docs/integrations/libraries/gcp/dataproc.md similarity index 97% rename from docs/docs-beta/docs/integrations/gcp/dataproc.md rename to docs/docs-beta/docs/integrations/libraries/gcp/dataproc.md index fbb9527fb6065..dc168778f798b 100644 --- a/docs/docs-beta/docs/integrations/gcp/dataproc.md +++ b/docs/docs-beta/docs/integrations/libraries/gcp/dataproc.md @@ -14,9 +14,10 @@ categories: - Compute enabledBy: enables: +tags: [dagster-supported, compute] --- -### About this integration + Using this integration, you can manage and interact with Google Cloud Platform's Dataproc service directly from Dagster. This integration allows you to create, manage, and delete Dataproc clusters, and submit and monitor jobs on these clusters. diff --git a/docs/docs-beta/docs/integrations/gcp/gcs.md b/docs/docs-beta/docs/integrations/libraries/gcp/gcs.md similarity index 96% rename from docs/docs-beta/docs/integrations/gcp/gcs.md rename to docs/docs-beta/docs/integrations/libraries/gcp/gcs.md index 4969db7e33882..5969fe8f89368 100644 --- a/docs/docs-beta/docs/integrations/gcp/gcs.md +++ b/docs/docs-beta/docs/integrations/libraries/gcp/gcs.md @@ -14,9 +14,10 @@ categories: - Storage enabledBy: enables: +tags: [dagster-supported, storage] --- -### About this integration + This integration allows you to interact with Google Cloud Storage (GCS) using Dagster. It provides resources, I/O Managers, and utilities to manage and store data in GCS, making it easier to integrate GCS into your data pipelines. diff --git a/docs/docs-beta/docs/integrations/gcp/index.md b/docs/docs-beta/docs/integrations/libraries/gcp/index.md similarity index 100% rename from docs/docs-beta/docs/integrations/gcp/index.md rename to docs/docs-beta/docs/integrations/libraries/gcp/index.md diff --git a/docs/docs-beta/docs/integrations/github.md b/docs/docs-beta/docs/integrations/libraries/github.md similarity index 97% rename from docs/docs-beta/docs/integrations/github.md rename to docs/docs-beta/docs/integrations/libraries/github.md index 19c278d44ea0a..8d4176eb8e940 100644 --- a/docs/docs-beta/docs/integrations/github.md +++ b/docs/docs-beta/docs/integrations/libraries/github.md @@ -14,9 +14,10 @@ categories: - Other enabledBy: enables: +tags: [dagster-supported] --- -### About this integration + This library provides an integration with _[GitHub Apps](https://docs.github.com/en/developers/apps/getting-started-with-apps/about-apps)_ by providing a thin wrapper on the GitHub v4 GraphQL API. This allows for automating operations within your GitHub repositories and with the tighter permissions scopes that GitHub Apps allow for vs using a personal token. diff --git a/docs/docs-beta/docs/integrations/hashicorp.md b/docs/docs-beta/docs/integrations/libraries/hashicorp.md similarity index 97% rename from docs/docs-beta/docs/integrations/hashicorp.md rename to docs/docs-beta/docs/integrations/libraries/hashicorp.md index 5664934092637..5d65ae6106959 100644 --- a/docs/docs-beta/docs/integrations/hashicorp.md +++ b/docs/docs-beta/docs/integrations/libraries/hashicorp.md @@ -15,9 +15,10 @@ categories: - Other enabledBy: enables: +tags: [community-supported] --- -### About this integration + Package for integrating HashiCorp Vault into Dagster so that you can securely manage tokens and passwords. diff --git a/docs/docs-beta/docs/integrations/hightouch.md b/docs/docs-beta/docs/integrations/libraries/hightouch.md similarity index 97% rename from docs/docs-beta/docs/integrations/hightouch.md rename to docs/docs-beta/docs/integrations/libraries/hightouch.md index 11f81649565c9..eea47836e2390 100644 --- a/docs/docs-beta/docs/integrations/hightouch.md +++ b/docs/docs-beta/docs/integrations/libraries/hightouch.md @@ -14,9 +14,10 @@ categories: - ETL enabledBy: enables: +tags: [community-supported, etl] --- -### About this integration + With this integration you can trigger Hightouch syncs and monitor them from within Dagster. Fine-tune when Hightouch syncs kick-off, visualize their dependencies, and monitor the steps in your data activation workflow. diff --git a/docs/docs-beta/docs/integrations/libraries/index.md b/docs/docs-beta/docs/integrations/libraries/index.md new file mode 100644 index 0000000000000..99a53949d12af --- /dev/null +++ b/docs/docs-beta/docs/integrations/libraries/index.md @@ -0,0 +1,10 @@ +--- +title: Libraries +sidebar_class_name: hidden +--- + +You can integrate Dagster with external services using our libraries and libraries supported by the community. + +import DocCardList from '@theme/DocCardList'; + + \ No newline at end of file diff --git a/docs/docs-beta/docs/integrations/jupyter.md b/docs/docs-beta/docs/integrations/libraries/jupyter.md similarity index 96% rename from docs/docs-beta/docs/integrations/jupyter.md rename to docs/docs-beta/docs/integrations/libraries/jupyter.md index f0ab1db8998cf..c24ab32e1f1d5 100644 --- a/docs/docs-beta/docs/integrations/jupyter.md +++ b/docs/docs-beta/docs/integrations/libraries/jupyter.md @@ -15,6 +15,7 @@ enabledBy: categories: - Compute enables: +tags: [dagster-supported, compute] --- ### About Jupyter diff --git a/docs/docs-beta/docs/integrations/kubernetes.md b/docs/docs-beta/docs/integrations/libraries/kubernetes.md similarity index 98% rename from docs/docs-beta/docs/integrations/kubernetes.md rename to docs/docs-beta/docs/integrations/libraries/kubernetes.md index dbe389b9b2536..bdff728e10bbf 100644 --- a/docs/docs-beta/docs/integrations/kubernetes.md +++ b/docs/docs-beta/docs/integrations/libraries/kubernetes.md @@ -14,9 +14,10 @@ categories: - Compute enabledBy: enables: +tags: [dagster-supported, compute] --- -### About this integration + The `dagster-k8s` integration library provides the `PipesK8sClient` resource, enabling you to launch Kubernetes pods and execute external code directly from Dagster assets and ops. This integration allows you to pass parameters to Kubernetes pods while Dagster receives real-time events, such as logs, asset checks, and asset materializations, from the initiated jobs. With minimal code changes required on the job side, this integration is both efficient and easy to implement. diff --git a/docs/docs-beta/docs/integrations/lakefs.md b/docs/docs-beta/docs/integrations/libraries/lakefs.md similarity index 97% rename from docs/docs-beta/docs/integrations/lakefs.md rename to docs/docs-beta/docs/integrations/libraries/lakefs.md index c4901e7bc28f6..64f3510405c21 100644 --- a/docs/docs-beta/docs/integrations/lakefs.md +++ b/docs/docs-beta/docs/integrations/libraries/lakefs.md @@ -15,9 +15,10 @@ categories: - Storage enabledBy: enables: +tags: [community-supported, storage] --- -### About this integration + By integrating with lakeFS, a big data scale version control system, you can leverage the versioning capabilities of lakeFS to track changes to your data. This integration allows you to have a complete lineage of your data, from the initial raw data to the transformed and processed data, making it easier to understand and reproduce data transformations. diff --git a/docs/docs-beta/docs/integrations/looker.md b/docs/docs-beta/docs/integrations/libraries/looker.md similarity index 97% rename from docs/docs-beta/docs/integrations/looker.md rename to docs/docs-beta/docs/integrations/libraries/looker.md index 33b936b606125..4cff5bf5fddcd 100644 --- a/docs/docs-beta/docs/integrations/looker.md +++ b/docs/docs-beta/docs/integrations/libraries/looker.md @@ -15,9 +15,10 @@ categories: - BI enabledBy: enables: +tags: [dagster-supported, bi] --- -### About this integration + Dagster allows you to represent your Looker project as assets, alongside other your other technologies like dbt and Sling. This allows you to see how your Looker assets are connected to your other data assets, and how changes to other data assets might impact your Looker project. diff --git a/docs/docs-beta/docs/integrations/meltano.md b/docs/docs-beta/docs/integrations/libraries/meltano.md similarity index 97% rename from docs/docs-beta/docs/integrations/meltano.md rename to docs/docs-beta/docs/integrations/libraries/meltano.md index 3a36625b75b4c..c78e518e8ca24 100644 --- a/docs/docs-beta/docs/integrations/meltano.md +++ b/docs/docs-beta/docs/integrations/libraries/meltano.md @@ -15,9 +15,10 @@ categories: communityIntegration: true enabledBy: enables: +tags: [community-supported, etl] --- -### About this integration + The `dagster-meltano` library allows you to run Meltano using Dagster. Design and configure ingestion jobs using the popular [Singer.io](https://singer.io) specification. diff --git a/docs/docs-beta/docs/integrations/microsoft-teams.md b/docs/docs-beta/docs/integrations/libraries/microsoft-teams.md similarity index 96% rename from docs/docs-beta/docs/integrations/microsoft-teams.md rename to docs/docs-beta/docs/integrations/libraries/microsoft-teams.md index bed3741fdcda1..183b9f3d54cab 100644 --- a/docs/docs-beta/docs/integrations/microsoft-teams.md +++ b/docs/docs-beta/docs/integrations/libraries/microsoft-teams.md @@ -14,10 +14,9 @@ categories: - Alerting enabledBy: enables: +tags: [dagster-supported, alerting] --- -### About this integration - By configuring this resource, you can post messages to MS Teams from any Dagster op or asset. ### Installation diff --git a/docs/docs-beta/docs/integrations/open-metadata.md b/docs/docs-beta/docs/integrations/libraries/open-metadata.md similarity index 97% rename from docs/docs-beta/docs/integrations/open-metadata.md rename to docs/docs-beta/docs/integrations/libraries/open-metadata.md index 476eaaa032b48..65c6c5077fd86 100644 --- a/docs/docs-beta/docs/integrations/open-metadata.md +++ b/docs/docs-beta/docs/integrations/libraries/open-metadata.md @@ -15,9 +15,10 @@ categories: - Metadata enabledBy: enables: +tags: [community-supported, metadata] --- -### About this integration + With this integration you can create a Open Metadata service to ingest metadata produced by the Dagster application. View the Ingestion Pipeline running from the Open Metadata Service Page. diff --git a/docs/docs-beta/docs/integrations/openai.md b/docs/docs-beta/docs/integrations/libraries/openai.md similarity index 97% rename from docs/docs-beta/docs/integrations/openai.md rename to docs/docs-beta/docs/integrations/libraries/openai.md index 16d3f84270a96..532e21a9018eb 100644 --- a/docs/docs-beta/docs/integrations/openai.md +++ b/docs/docs-beta/docs/integrations/libraries/openai.md @@ -14,9 +14,10 @@ categories: - Other enabledBy: enables: +tags: [dagster-supported] --- -### About this integration + The `dagster-openai` library allows you to easily interact with the OpenAI REST API using the OpenAI Python API to build AI steps into your Dagster pipelines. You can also log OpenAI API usage metadata in Dagster Insights, giving you detailed observability on API call credit consumption. diff --git a/docs/docs-beta/docs/integrations/pagerduty.md b/docs/docs-beta/docs/integrations/libraries/pagerduty.md similarity index 95% rename from docs/docs-beta/docs/integrations/pagerduty.md rename to docs/docs-beta/docs/integrations/libraries/pagerduty.md index b2ac9dbcb111c..4c3b577b03fcc 100644 --- a/docs/docs-beta/docs/integrations/pagerduty.md +++ b/docs/docs-beta/docs/integrations/libraries/pagerduty.md @@ -14,9 +14,10 @@ categories: - Alerting enabledBy: enables: +tags: [dagster-supported, alerting] --- -### About this integration + This library provides an integration between Dagster and PagerDuty to support creating alerts from your Dagster code. diff --git a/docs/docs-beta/docs/integrations/pandas.md b/docs/docs-beta/docs/integrations/libraries/pandas.md similarity index 96% rename from docs/docs-beta/docs/integrations/pandas.md rename to docs/docs-beta/docs/integrations/libraries/pandas.md index b051eb7a98023..6f7ea38111aac 100644 --- a/docs/docs-beta/docs/integrations/pandas.md +++ b/docs/docs-beta/docs/integrations/libraries/pandas.md @@ -14,9 +14,10 @@ categories: - Metadata enabledBy: enables: +tags: [dagster-supported, metadata] --- -### About this integration + Perform data validation, emit summary statistics, and enable reliable DataFrame serialization/deserialization. The dagster_pandas library provides you with the utilities for implementing validation on Pandas DataFrames. The Dagster type system generates documentation of your DataFrame constraints and makes it accessible in the Dagster UI. diff --git a/docs/docs-beta/docs/integrations/pandera.md b/docs/docs-beta/docs/integrations/libraries/pandera.md similarity index 96% rename from docs/docs-beta/docs/integrations/pandera.md rename to docs/docs-beta/docs/integrations/libraries/pandera.md index 6957b91cbe3f7..1549013311feb 100644 --- a/docs/docs-beta/docs/integrations/pandera.md +++ b/docs/docs-beta/docs/integrations/libraries/pandera.md @@ -14,9 +14,10 @@ categories: - Metadata enabledBy: enables: +tags: [dagster-supported, metadata] --- -### About this integration + The `dagster-pandera` integration library provides an API for generating Dagster Types from [Pandera DataFrame schemas](https://pandera.readthedocs.io/en/stable/dataframe_schemas.html). diff --git a/docs/docs-beta/docs/integrations/prometheus.md b/docs/docs-beta/docs/integrations/libraries/prometheus.md similarity index 96% rename from docs/docs-beta/docs/integrations/prometheus.md rename to docs/docs-beta/docs/integrations/libraries/prometheus.md index a25da65aaaeaa..e90e0fe7fb10b 100644 --- a/docs/docs-beta/docs/integrations/prometheus.md +++ b/docs/docs-beta/docs/integrations/libraries/prometheus.md @@ -14,9 +14,10 @@ categories: - Monitoring enabledBy: enables: +tags: [dagster-supported, monitoring] --- -### About this integration + This integration allows you to push metrics to the Prometheus gateway from within a Dagster pipeline. diff --git a/docs/docs-beta/docs/integrations/sdf.md b/docs/docs-beta/docs/integrations/libraries/sdf.md similarity index 97% rename from docs/docs-beta/docs/integrations/sdf.md rename to docs/docs-beta/docs/integrations/libraries/sdf.md index ce80dd510ce77..a155f37bfa613 100644 --- a/docs/docs-beta/docs/integrations/sdf.md +++ b/docs/docs-beta/docs/integrations/libraries/sdf.md @@ -15,9 +15,10 @@ categories: - ETL enabledBy: enables: +tags: [community-supported, etl] --- -### About this integration + SDF can integrate seamlessly with your existing Dagster projects, providing the best-in-class transformation layer while enabling you to schedule, orchestrate, and monitor your dags in Dagster. diff --git a/docs/docs-beta/docs/integrations/secoda.md b/docs/docs-beta/docs/integrations/libraries/secoda.md similarity index 96% rename from docs/docs-beta/docs/integrations/secoda.md rename to docs/docs-beta/docs/integrations/libraries/secoda.md index 58c4c738eff32..3ec41ce364776 100644 --- a/docs/docs-beta/docs/integrations/secoda.md +++ b/docs/docs-beta/docs/integrations/libraries/secoda.md @@ -15,9 +15,10 @@ categories: - Metadata enabledBy: enables: +tags: [community-supported, metadata] --- -### About this integration + Connect Dagster to Secoda and see metadata related to your Dagster assets, asset groups and jobs right in Secoda. Simplify your team's access, and remove the need to switch between tools. diff --git a/docs/docs-beta/docs/integrations/shell.md b/docs/docs-beta/docs/integrations/libraries/shell.md similarity index 97% rename from docs/docs-beta/docs/integrations/shell.md rename to docs/docs-beta/docs/integrations/libraries/shell.md index 0c5653a4f47c6..6b69731c6aeea 100644 --- a/docs/docs-beta/docs/integrations/shell.md +++ b/docs/docs-beta/docs/integrations/libraries/shell.md @@ -14,9 +14,10 @@ categories: - Compute enabledBy: enables: +tags: [dagster-supported, compute] --- -### About this integration + Dagster comes with a native `PipesSubprocessClient` resource that enables you to launch shell commands directly from Dagster assets and ops. This integration allows you to pass parameters to external shell scripts while Dagster receives real-time events, such as logs, asset checks, and asset materializations, from the initiated external execution. With minimal code changes required on the job side, this integration is both efficient and easy to implement. diff --git a/docs/docs-beta/docs/integrations/slack.md b/docs/docs-beta/docs/integrations/libraries/slack.md similarity index 95% rename from docs/docs-beta/docs/integrations/slack.md rename to docs/docs-beta/docs/integrations/libraries/slack.md index ead87dc85b4b2..3c5d3b0253ec9 100644 --- a/docs/docs-beta/docs/integrations/slack.md +++ b/docs/docs-beta/docs/integrations/libraries/slack.md @@ -14,9 +14,10 @@ categories: - Alerting enabledBy: enables: +tags: [dagster-supported, alerting] --- -### About this integration + This library provides an integration with Slack to support posting messages in your company's Slack workspace. diff --git a/docs/docs-beta/docs/integrations/sling.md b/docs/docs-beta/docs/integrations/libraries/sling.md similarity index 96% rename from docs/docs-beta/docs/integrations/sling.md rename to docs/docs-beta/docs/integrations/libraries/sling.md index 0c00aea6bb7e7..58cceb2bac261 100644 --- a/docs/docs-beta/docs/integrations/sling.md +++ b/docs/docs-beta/docs/integrations/libraries/sling.md @@ -14,9 +14,10 @@ categories: - ETL enabledBy: enables: +tags: [dagster-supported, etl] --- -### About this integration + This integration allows you to use [Sling](https://slingdata.io/) to extract and load data from popular data sources to destinations with high performance and ease. diff --git a/docs/docs-beta/docs/integrations/snowflake.md b/docs/docs-beta/docs/integrations/libraries/snowflake.md similarity index 96% rename from docs/docs-beta/docs/integrations/snowflake.md rename to docs/docs-beta/docs/integrations/libraries/snowflake.md index 1f8adf390a4be..3e3f45986ff76 100644 --- a/docs/docs-beta/docs/integrations/snowflake.md +++ b/docs/docs-beta/docs/integrations/libraries/snowflake.md @@ -14,9 +14,10 @@ categories: - Storage enabledBy: enables: +tags: [dagster-supported, storage] --- -### About this integration + This library provides an integration with the Snowflake data warehouse. Connect to Snowflake as a resource, then use the integration-provided functions to construct an op to establish connections and execute Snowflake queries. Read and write natively to Snowflake from Dagster assets. diff --git a/docs/docs-beta/docs/integrations/spark.md b/docs/docs-beta/docs/integrations/libraries/spark.md similarity index 96% rename from docs/docs-beta/docs/integrations/spark.md rename to docs/docs-beta/docs/integrations/libraries/spark.md index a8e1b693e82b3..e14f63f73a10a 100644 --- a/docs/docs-beta/docs/integrations/spark.md +++ b/docs/docs-beta/docs/integrations/libraries/spark.md @@ -15,9 +15,10 @@ categories: enabledBy: - dagster-pyspark enables: +tags: [dagster-supported, compute] --- -### About this integration + Spark jobs typically execute on infrastructure that's specialized for Spark. Spark applications are typically not containerized or executed on Kubernetes. diff --git a/docs/docs-beta/docs/integrations/ssh-sftp.md b/docs/docs-beta/docs/integrations/libraries/ssh-sftp.md similarity index 97% rename from docs/docs-beta/docs/integrations/ssh-sftp.md rename to docs/docs-beta/docs/integrations/libraries/ssh-sftp.md index bd8d15ed34626..be2314fa07cbb 100644 --- a/docs/docs-beta/docs/integrations/ssh-sftp.md +++ b/docs/docs-beta/docs/integrations/libraries/ssh-sftp.md @@ -14,9 +14,10 @@ categories: - Other enabledBy: enables: +tags: [dagster-supported] --- -### About this integration + This integration provides a resource for SSH remote execution using [Paramiko](https://github.com/paramiko/paramiko). It allows you to establish secure connections to networked resources and execute commands remotely. The integration also provides an SFTP client for secure file transfers between the local and remote systems. diff --git a/docs/docs-beta/docs/integrations/twilio.md b/docs/docs-beta/docs/integrations/libraries/twilio.md similarity index 95% rename from docs/docs-beta/docs/integrations/twilio.md rename to docs/docs-beta/docs/integrations/libraries/twilio.md index 2b2cb4d56f3af..4584fdb550781 100644 --- a/docs/docs-beta/docs/integrations/twilio.md +++ b/docs/docs-beta/docs/integrations/libraries/twilio.md @@ -14,9 +14,10 @@ categories: - Alerting enabledBy: enables: +tags: [dagster-supported, alerting] --- -### About this integration + Use your Twilio `Account SID` and `Auth Token` to build Twilio tasks right into your Dagster pipeline. diff --git a/docs/docs-beta/docs/integrations/wandb.md b/docs/docs-beta/docs/integrations/libraries/wandb.md similarity index 98% rename from docs/docs-beta/docs/integrations/wandb.md rename to docs/docs-beta/docs/integrations/libraries/wandb.md index 44a7ad762600c..363950d2fa10e 100644 --- a/docs/docs-beta/docs/integrations/wandb.md +++ b/docs/docs-beta/docs/integrations/libraries/wandb.md @@ -15,9 +15,10 @@ categories: - Other enabledBy: enables: +tags: [community-supported] --- -### About this integration + Use Dagster and Weights & Biases (W&B) to orchestrate your MLOps pipelines and maintain ML assets. The integration with W&B makes it easy within Dagster to: diff --git a/docs/docs-beta/docs/tags.yml b/docs/docs-beta/docs/tags.yml new file mode 100644 index 0000000000000..c4ec13c2a2145 --- /dev/null +++ b/docs/docs-beta/docs/tags.yml @@ -0,0 +1,36 @@ +community-supported: + label: 'community-supported' + permalink: '/integrations/community-supported' + description: 'Community-supported integrations.' +dagster-supported: + label: 'dagster-supported' + permalink: '/integrations/dagster-supported' + description: 'Dagster-supported integrations.' +etl: + label: 'ETL' + permalink: '/integrations/etl' + description: 'ETL integrations.' +storage: + label: 'storage' + permalink: '/integrations/storage' + description: 'Storage integrations.' +compute: + label: 'compute' + permalink: '/integrations/compute' + description: 'Compute integrations.' +bi: + label: 'BI' + permalink: '/integrations/bi' + description: 'BI integrations.' +monitoring: + label: 'monitoring' + permalink: '/integrations/monitoring' + description: 'Monitoring integrations.' +alerting: + label: 'alerting' + permalink: '/integrations/alerting' + description: 'Alerting integrations.' +metadata: + label: 'metadata' + permalink: '/integrations/metadata' + description: 'Metadata integrations.' diff --git a/docs/docs-beta/docusaurus.config.ts b/docs/docs-beta/docusaurus.config.ts index 491dc86f6ba46..7a3f8dd9f75af 100644 --- a/docs/docs-beta/docusaurus.config.ts +++ b/docs/docs-beta/docusaurus.config.ts @@ -1,6 +1,7 @@ import {themes as prismThemes} from 'prism-react-renderer'; import type {Config} from '@docusaurus/types'; import type * as Preset from '@docusaurus/preset-classic'; +import { groupCollapsed } from 'console'; const config: Config = { title: 'Dagster Docs - Beta', @@ -84,7 +85,7 @@ const config: Config = { { label: 'Integrations', type: 'doc', - docId: 'integrations/index', + docId: 'integrations/libraries/index', position: 'left', }, { diff --git a/docs/docs-beta/sidebars.ts b/docs/docs-beta/sidebars.ts index 129d82e4510bb..5bc6f64d74974 100644 --- a/docs/docs-beta/sidebars.ts +++ b/docs/docs-beta/sidebars.ts @@ -100,133 +100,24 @@ const sidebars: SidebarsConfig = { integrations: [ { type: 'category', - label: 'Categories', - collapsible: false, - items: [ - { - type: 'category', - label: 'ETL', - items: [ - 'integrations/airbyte', - 'integrations/sdf', - 'integrations/fivetran', - 'integrations/dlt', - 'integrations/census', - 'integrations/dbt', - 'integrations/dbt-cloud', - 'integrations/sling', - 'integrations/hightouch', - 'integrations/meltano', - ], - }, - { - type: 'category', - label: 'Storage', - items: [ - 'integrations/snowflake', - 'integrations/gcp/bigquery', - 'integrations/aws/athena', - 'integrations/aws/s3', - 'integrations/duckdb', - 'integrations/deltalake', - 'integrations/aws/redshift', - 'integrations/gcp/gcs', - 'integrations/azure-adls2', - 'integrations/lakefs', - ], - }, - { - type: 'category', - label: 'Compute', - items: [ - 'integrations/kubernetes', - 'integrations/spark', - 'integrations/aws/glue', - 'integrations/jupyter', - 'integrations/aws/emr', - 'integrations/databricks', - 'integrations/aws/lambda', - 'integrations/docker', - 'integrations/shell', - 'integrations/gcp/dataproc', - ], - }, - { - type: 'category', - label: 'BI', - items: ['integrations/looker'], - }, - { - type: 'category', - label: 'Monitoring', - items: ['integrations/prometheus', 'integrations/datadog', 'integrations/aws/cloudwatch'], - }, - { - type: 'category', - label: 'Alerting', - items: [ - 'integrations/slack', - 'integrations/twilio', - 'integrations/pagerduty', - 'integrations/microsoft-teams', - ], - }, - { - type: 'category', - label: 'Metadata', - items: [ - 'integrations/secoda', - 'integrations/pandera', - 'integrations/open-metadata', - 'integrations/pandas', - ], - }, - { - type: 'category', - label: 'Other', - items: [ - 'integrations/cube', - 'integrations/aws/secretsmanager', - 'integrations/openai', - 'integrations/ssh-sftp', - 'integrations/github', - 'integrations/aws/ssm', - 'integrations/aws/ecr', - 'integrations/wandb', - 'integrations/hashicorp', - ], - }, - ], - }, - { - type: 'category', - label: 'Community Supported', + label: 'Guides', + collapsed: false, items: [ - 'integrations/secoda', - 'integrations/cube', - 'integrations/sdf', - 'integrations/open-metadata', - 'integrations/census', - 'integrations/deltalake', - 'integrations/hightouch', - 'integrations/wandb', - 'integrations/meltano', - 'integrations/hashicorp', - 'integrations/lakefs', - ], + 'integrations/guides/multi-asset-integration' + ] }, { type: 'category', - label: 'All Integrations', - collapsed: true, - // link: {type: 'doc', id: 'integrations'}, + label: 'Libraries', + collapsible: false, + link: {type: 'doc', id: 'integrations/libraries/index'}, items: [ { type: 'autogenerated', - dirName: 'integrations', - }, - ], - }, + dirName: 'integrations/libraries' + } + ] + } ], dagsterPlus: [ 'dagster-plus/index', diff --git a/docs/docs-beta/static/images/dagster-cloud/deployment/hybrid-architecture.png b/docs/docs-beta/static/images/dagster-cloud/deployment/hybrid-architecture.png new file mode 100644 index 0000000000000..89ce1b90c0b59 Binary files /dev/null and b/docs/docs-beta/static/images/dagster-cloud/deployment/hybrid-architecture.png differ diff --git a/docs/next/.versioned_content/_versions_with_static_links.json b/docs/next/.versioned_content/_versions_with_static_links.json index 03a000fd36b18..5b12b5f2ea18d 100644 --- a/docs/next/.versioned_content/_versions_with_static_links.json +++ b/docs/next/.versioned_content/_versions_with_static_links.json @@ -606,5 +606,9 @@ { "url": "https://release-1-9-4.dagster.dagster-docs.io/", "version": "1.9.4" + }, + { + "url": "https://release-1-9-5.dagster.dagster-docs.io/", + "version": "1.9.5" } ] \ No newline at end of file diff --git a/docs/next/package.json b/docs/next/package.json index 46e996b0c0a16..5a8f3fc40d108 100644 --- a/docs/next/package.json +++ b/docs/next/package.json @@ -40,7 +40,7 @@ "lodash": "^4.17.21", "mdast-util-toc": "^5.1.0", "new-github-issue-url": "^0.2.1", - "next": "^14.2.10", + "next": "^14.2.15", "next-mdx-remote": "^2.1.4", "next-remote-watch": "^2.0.0", "next-seo": "^4.17.0", diff --git a/docs/next/public/images/concepts/metadata-tags/kinds/icons/tool-googledrive-color.svg b/docs/next/public/images/concepts/metadata-tags/kinds/icons/tool-googledrive-color.svg new file mode 100644 index 0000000000000..2d94beff46945 --- /dev/null +++ b/docs/next/public/images/concepts/metadata-tags/kinds/icons/tool-googledrive-color.svg @@ -0,0 +1,3 @@ + + + diff --git a/docs/next/public/objects.inv b/docs/next/public/objects.inv index fd50d97032019..dc486e70947de 100644 Binary files a/docs/next/public/objects.inv and b/docs/next/public/objects.inv differ diff --git a/docs/next/yarn.lock b/docs/next/yarn.lock index c1c47a3337653..b030a96106a71 100644 --- a/docs/next/yarn.lock +++ b/docs/next/yarn.lock @@ -2252,10 +2252,10 @@ __metadata: languageName: node linkType: hard -"@next/env@npm:14.2.12": - version: 14.2.12 - resolution: "@next/env@npm:14.2.12" - checksum: 10/9e1f36da7d794a29db42ebc68e24cc7ab19ab2d1fd86d6cdf872fac0f56cbce97d6df9ff43f526ec083c505feea716b86668c7fcc410d809ad136bb656a45d03 +"@next/env@npm:14.2.20": + version: 14.2.20 + resolution: "@next/env@npm:14.2.20" + checksum: 10/3aaf2ba16344d7cede12a846859fddffa172e951f2dc28bb66f8b7c24cb2c207d2a49c84fea965ae964714aeb2269cff7a91723b57631765f78fd02b9465d1f2 languageName: node linkType: hard @@ -2278,65 +2278,65 @@ __metadata: languageName: node linkType: hard -"@next/swc-darwin-arm64@npm:14.2.12": - version: 14.2.12 - resolution: "@next/swc-darwin-arm64@npm:14.2.12" +"@next/swc-darwin-arm64@npm:14.2.20": + version: 14.2.20 + resolution: "@next/swc-darwin-arm64@npm:14.2.20" conditions: os=darwin & cpu=arm64 languageName: node linkType: hard -"@next/swc-darwin-x64@npm:14.2.12": - version: 14.2.12 - resolution: "@next/swc-darwin-x64@npm:14.2.12" +"@next/swc-darwin-x64@npm:14.2.20": + version: 14.2.20 + resolution: "@next/swc-darwin-x64@npm:14.2.20" conditions: os=darwin & cpu=x64 languageName: node linkType: hard -"@next/swc-linux-arm64-gnu@npm:14.2.12": - version: 14.2.12 - resolution: "@next/swc-linux-arm64-gnu@npm:14.2.12" +"@next/swc-linux-arm64-gnu@npm:14.2.20": + version: 14.2.20 + resolution: "@next/swc-linux-arm64-gnu@npm:14.2.20" conditions: os=linux & cpu=arm64 & libc=glibc languageName: node linkType: hard -"@next/swc-linux-arm64-musl@npm:14.2.12": - version: 14.2.12 - resolution: "@next/swc-linux-arm64-musl@npm:14.2.12" +"@next/swc-linux-arm64-musl@npm:14.2.20": + version: 14.2.20 + resolution: "@next/swc-linux-arm64-musl@npm:14.2.20" conditions: os=linux & cpu=arm64 & libc=musl languageName: node linkType: hard -"@next/swc-linux-x64-gnu@npm:14.2.12": - version: 14.2.12 - resolution: "@next/swc-linux-x64-gnu@npm:14.2.12" +"@next/swc-linux-x64-gnu@npm:14.2.20": + version: 14.2.20 + resolution: "@next/swc-linux-x64-gnu@npm:14.2.20" conditions: os=linux & cpu=x64 & libc=glibc languageName: node linkType: hard -"@next/swc-linux-x64-musl@npm:14.2.12": - version: 14.2.12 - resolution: "@next/swc-linux-x64-musl@npm:14.2.12" +"@next/swc-linux-x64-musl@npm:14.2.20": + version: 14.2.20 + resolution: "@next/swc-linux-x64-musl@npm:14.2.20" conditions: os=linux & cpu=x64 & libc=musl languageName: node linkType: hard -"@next/swc-win32-arm64-msvc@npm:14.2.12": - version: 14.2.12 - resolution: "@next/swc-win32-arm64-msvc@npm:14.2.12" +"@next/swc-win32-arm64-msvc@npm:14.2.20": + version: 14.2.20 + resolution: "@next/swc-win32-arm64-msvc@npm:14.2.20" conditions: os=win32 & cpu=arm64 languageName: node linkType: hard -"@next/swc-win32-ia32-msvc@npm:14.2.12": - version: 14.2.12 - resolution: "@next/swc-win32-ia32-msvc@npm:14.2.12" +"@next/swc-win32-ia32-msvc@npm:14.2.20": + version: 14.2.20 + resolution: "@next/swc-win32-ia32-msvc@npm:14.2.20" conditions: os=win32 & cpu=ia32 languageName: node linkType: hard -"@next/swc-win32-x64-msvc@npm:14.2.12": - version: 14.2.12 - resolution: "@next/swc-win32-x64-msvc@npm:14.2.12" +"@next/swc-win32-x64-msvc@npm:14.2.20": + version: 14.2.20 + resolution: "@next/swc-win32-x64-msvc@npm:14.2.20" conditions: os=win32 & cpu=x64 languageName: node linkType: hard @@ -4346,7 +4346,7 @@ __metadata: lodash: "npm:^4.17.21" mdast-util-toc: "npm:^5.1.0" new-github-issue-url: "npm:^0.2.1" - next: "npm:^14.2.10" + next: "npm:^14.2.15" next-mdx-remote: "npm:^2.1.4" next-remote-watch: "npm:^2.0.0" next-seo: "npm:^4.17.0" @@ -8456,20 +8456,20 @@ __metadata: languageName: node linkType: hard -"next@npm:^14.2.10": - version: 14.2.12 - resolution: "next@npm:14.2.12" +"next@npm:^14.2.15": + version: 14.2.20 + resolution: "next@npm:14.2.20" dependencies: - "@next/env": "npm:14.2.12" - "@next/swc-darwin-arm64": "npm:14.2.12" - "@next/swc-darwin-x64": "npm:14.2.12" - "@next/swc-linux-arm64-gnu": "npm:14.2.12" - "@next/swc-linux-arm64-musl": "npm:14.2.12" - "@next/swc-linux-x64-gnu": "npm:14.2.12" - "@next/swc-linux-x64-musl": "npm:14.2.12" - "@next/swc-win32-arm64-msvc": "npm:14.2.12" - "@next/swc-win32-ia32-msvc": "npm:14.2.12" - "@next/swc-win32-x64-msvc": "npm:14.2.12" + "@next/env": "npm:14.2.20" + "@next/swc-darwin-arm64": "npm:14.2.20" + "@next/swc-darwin-x64": "npm:14.2.20" + "@next/swc-linux-arm64-gnu": "npm:14.2.20" + "@next/swc-linux-arm64-musl": "npm:14.2.20" + "@next/swc-linux-x64-gnu": "npm:14.2.20" + "@next/swc-linux-x64-musl": "npm:14.2.20" + "@next/swc-win32-arm64-msvc": "npm:14.2.20" + "@next/swc-win32-ia32-msvc": "npm:14.2.20" + "@next/swc-win32-x64-msvc": "npm:14.2.20" "@swc/helpers": "npm:0.5.5" busboy: "npm:1.6.0" caniuse-lite: "npm:^1.0.30001579" @@ -8510,7 +8510,7 @@ __metadata: optional: true bin: next: dist/bin/next - checksum: 10/4dcae15547930cdaeb8a1d935dec3ab0c82a65347b0835988fd70fa5b108f1c301b75f98acf063c253858719e2969301fb2b0c30d6b2a46086ec19419430b119 + checksum: 10/baddcaeffa82e321cda87ad727540fc8ad639af5439ccc69b349c2b9a4315244d55c4aeed391c7bcd79edd634d6410b9e4a718ca02cc9e910046960444bb0c64 languageName: node linkType: hard diff --git a/docs/sphinx/sections/api/apidocs/libraries/dagster-aws.rst b/docs/sphinx/sections/api/apidocs/libraries/dagster-aws.rst index e8774ae92ed5d..15d161197def5 100644 --- a/docs/sphinx/sections/api/apidocs/libraries/dagster-aws.rst +++ b/docs/sphinx/sections/api/apidocs/libraries/dagster-aws.rst @@ -49,6 +49,9 @@ ECS .. autoconfigurable:: dagster_aws.ecs.EcsRunLauncher :annotation: RunLauncher +.. autoconfigurable:: dagster_aws.ecs.ecs_executor + :annotation: ExecutorDefinition + Redshift -------- diff --git a/examples/assets_modern_data_stack/setup.py b/examples/assets_modern_data_stack/setup.py index 419a06850ae31..2b0aabbd2d2f1 100644 --- a/examples/assets_modern_data_stack/setup.py +++ b/examples/assets_modern_data_stack/setup.py @@ -23,11 +23,6 @@ "dagster-webserver", "pytest", ], - "test": [ - # cant build psycopg2 in buildkite - # something about the 1.8.0 dependency setup to avoid psycopg2-binary on linux - # seems to prevent that dependency from being used even if explicitly added - "dbt-postgres<1.8.0" - ], + "test": [], }, ) diff --git a/examples/docs_beta_snippets/docs_beta_snippets/integrations/fivetran.py b/examples/docs_beta_snippets/docs_beta_snippets/integrations/fivetran.py deleted file mode 100644 index 9446ff76d0ce6..0000000000000 --- a/examples/docs_beta_snippets/docs_beta_snippets/integrations/fivetran.py +++ /dev/null @@ -1,18 +0,0 @@ -import os - -from dagster_fivetran import FivetranResource, load_assets_from_fivetran_instance - -import dagster as dg - -fivetran_assets = load_assets_from_fivetran_instance( - # Connect to your Fivetran instance - FivetranResource( - api_key="some_key", - api_secret=dg.EnvVar("FIVETRAN_SECRET"), - ) -) - - -defs = dg.Definitions( - assets=[fivetran_assets], -) diff --git a/examples/docs_beta_snippets/docs_beta_snippets/integrations/fivetran/__init__.py b/examples/docs_beta_snippets/docs_beta_snippets/integrations/fivetran/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/examples/docs_beta_snippets/docs_beta_snippets/integrations/fivetran/customize_fivetran_asset_defs.py b/examples/docs_beta_snippets/docs_beta_snippets/integrations/fivetran/customize_fivetran_asset_defs.py new file mode 100644 index 0000000000000..b195ed0c134c3 --- /dev/null +++ b/examples/docs_beta_snippets/docs_beta_snippets/integrations/fivetran/customize_fivetran_asset_defs.py @@ -0,0 +1,29 @@ +from dagster_fivetran import FivetranWorkspace, fivetran_assets + +import dagster as dg + +fivetran_workspace = FivetranWorkspace( + account_id=dg.EnvVar("FIVETRAN_ACCOUNT_ID"), + api_key=dg.EnvVar("FIVETRAN_API_KEY"), + api_secret=dg.EnvVar("FIVETRAN_API_SECRET"), +) + + +@fivetran_assets( + connector_id="fivetran_connector_id", + name="fivetran_connector_id", + group_name="fivetran_connector_id", + workspace=fivetran_workspace, +) +def fivetran_connector_assets( + context: dg.AssetExecutionContext, fivetran: FivetranWorkspace +): + # Do something before the materialization... + yield from fivetran.sync_and_poll(context=context) + # Do something after the materialization... + + +defs = dg.Definitions( + assets=[fivetran_connector_assets], + resources={"fivetran": fivetran_workspace}, +) diff --git a/examples/docs_beta_snippets/docs_beta_snippets/integrations/fivetran/customize_fivetran_translator_asset_spec.py b/examples/docs_beta_snippets/docs_beta_snippets/integrations/fivetran/customize_fivetran_translator_asset_spec.py new file mode 100644 index 0000000000000..8debde35e6eba --- /dev/null +++ b/examples/docs_beta_snippets/docs_beta_snippets/integrations/fivetran/customize_fivetran_translator_asset_spec.py @@ -0,0 +1,33 @@ +from dagster_fivetran import ( + DagsterFivetranTranslator, + FivetranConnectorTableProps, + FivetranWorkspace, + load_fivetran_asset_specs, +) + +import dagster as dg + +fivetran_workspace = FivetranWorkspace( + account_id=dg.EnvVar("FIVETRAN_ACCOUNT_ID"), + api_key=dg.EnvVar("FIVETRAN_API_KEY"), + api_secret=dg.EnvVar("FIVETRAN_API_SECRET"), +) + + +# A translator class lets us customize properties of the built +# Fivetran assets, such as the owners or asset key +class MyCustomFivetranTranslator(DagsterFivetranTranslator): + def get_asset_spec(self, props: FivetranConnectorTableProps) -> dg.AssetSpec: + # We create the default asset spec using super() + default_spec = super().get_asset_spec(props) + # We customize the metadata and asset key prefix for all assets + return default_spec.replace_attributes( + key=default_spec.key.with_prefix("prefix"), + ).merge_attributes(metadata={"custom": "metadata"}) + + +fivetran_specs = load_fivetran_asset_specs( + fivetran_workspace, dagster_fivetran_translator=MyCustomFivetranTranslator() +) + +defs = dg.Definitions(assets=fivetran_specs, resources={"fivetran": fivetran_workspace}) diff --git a/examples/docs_beta_snippets/docs_beta_snippets/integrations/fivetran/multiple_fivetran_workspaces.py b/examples/docs_beta_snippets/docs_beta_snippets/integrations/fivetran/multiple_fivetran_workspaces.py new file mode 100644 index 0000000000000..d28203bf3bcdf --- /dev/null +++ b/examples/docs_beta_snippets/docs_beta_snippets/integrations/fivetran/multiple_fivetran_workspaces.py @@ -0,0 +1,26 @@ +from dagster_fivetran import FivetranWorkspace, load_fivetran_asset_specs + +import dagster as dg + +sales_fivetran_workspace = FivetranWorkspace( + account_id=dg.EnvVar("FIVETRAN_SALES_ACCOUNT_ID"), + api_key=dg.EnvVar("FIVETRAN_SALES_API_KEY"), + api_secret=dg.EnvVar("FIVETRAN_SALES_API_SECRET"), +) +marketing_fivetran_workspace = FivetranWorkspace( + account_id=dg.EnvVar("FIVETRAN_MARKETING_ACCOUNT_ID"), + api_key=dg.EnvVar("FIVETRAN_MARKETING_API_KEY"), + api_secret=dg.EnvVar("FIVETRAN_MARKETING_API_SECRET"), +) + +sales_fivetran_specs = load_fivetran_asset_specs(sales_fivetran_workspace) +marketing_fivetran_specs = load_fivetran_asset_specs(marketing_fivetran_workspace) + +# Merge the specs into a single set of definitions +defs = dg.Definitions( + assets=[*sales_fivetran_specs, *marketing_fivetran_specs], + resources={ + "marketing_fivetran": marketing_fivetran_workspace, + "sales_fivetran": sales_fivetran_workspace, + }, +) diff --git a/examples/docs_beta_snippets/docs_beta_snippets/integrations/fivetran/representing_fivetran_assets.py b/examples/docs_beta_snippets/docs_beta_snippets/integrations/fivetran/representing_fivetran_assets.py new file mode 100644 index 0000000000000..7637330c175cd --- /dev/null +++ b/examples/docs_beta_snippets/docs_beta_snippets/integrations/fivetran/representing_fivetran_assets.py @@ -0,0 +1,12 @@ +from dagster_fivetran import FivetranWorkspace, load_fivetran_asset_specs + +import dagster as dg + +fivetran_workspace = FivetranWorkspace( + account_id=dg.EnvVar("FIVETRAN_ACCOUNT_ID"), + api_key=dg.EnvVar("FIVETRAN_API_KEY"), + api_secret=dg.EnvVar("FIVETRAN_API_SECRET"), +) + +fivetran_specs = load_fivetran_asset_specs(fivetran_workspace) +defs = dg.Definitions(assets=fivetran_specs, resources={"fivetran": fivetran_workspace}) diff --git a/examples/docs_beta_snippets/docs_beta_snippets/integrations/fivetran/sync_and_materialize_fivetran_assets.py b/examples/docs_beta_snippets/docs_beta_snippets/integrations/fivetran/sync_and_materialize_fivetran_assets.py new file mode 100644 index 0000000000000..65a3490f59486 --- /dev/null +++ b/examples/docs_beta_snippets/docs_beta_snippets/integrations/fivetran/sync_and_materialize_fivetran_assets.py @@ -0,0 +1,16 @@ +from dagster_fivetran import FivetranWorkspace, build_fivetran_assets_definitions + +import dagster as dg + +fivetran_workspace = FivetranWorkspace( + account_id=dg.EnvVar("FIVETRAN_ACCOUNT_ID"), + api_key=dg.EnvVar("FIVETRAN_API_KEY"), + api_secret=dg.EnvVar("FIVETRAN_API_SECRET"), +) + +all_fivetran_assets = build_fivetran_assets_definitions(workspace=fivetran_workspace) + +defs = dg.Definitions( + assets=all_fivetran_assets, + resources={"fivetran": fivetran_workspace}, +) diff --git a/examples/docs_beta_snippets/docs_beta_snippets_tests/test_integration_files_load.py b/examples/docs_beta_snippets/docs_beta_snippets_tests/test_integration_files_load.py index faaf20053a482..89c5039563a98 100644 --- a/examples/docs_beta_snippets/docs_beta_snippets_tests/test_integration_files_load.py +++ b/examples/docs_beta_snippets/docs_beta_snippets_tests/test_integration_files_load.py @@ -19,7 +19,11 @@ f"{snippets_folder}/sdf.py", f"{snippets_folder}/airbyte.py", f"{snippets_folder}/dlt.py", - f"{snippets_folder}/fivetran.py", + f"{snippets_folder}/fivetran/customize_fivetran_asset_defs.py", + f"{snippets_folder}/fivetran/customize_fivetran_translator_asset_spec.py", + f"{snippets_folder}/fivetran/multiple_fivetran_workspaces.py", + f"{snippets_folder}/fivetran/representing_fivetran_assets.py", + f"{snippets_folder}/fivetran/sync_and_materialize_fivetran_assets.py", # FIXME: this breaks on py3.8 and seems related to the non-dagster dependencies f"{snippets_folder}/pandera.py", } diff --git a/examples/docs_beta_snippets/tox.ini b/examples/docs_beta_snippets/tox.ini index 44645d7b1f94e..f294484e22ad2 100644 --- a/examples/docs_beta_snippets/tox.ini +++ b/examples/docs_beta_snippets/tox.ini @@ -12,6 +12,11 @@ install_command = uv pip install {opts} {packages} deps = duckdb plotly + #### + # need deps of dagster-cloud that we need to add since we --no-deps below to avoid reinstalling dagster packages + opentelemetry-api + opentelemetry-sdk + #### -e ../../python_modules/dagster[test] -e ../../python_modules/dagster-pipes -e ../../python_modules/dagster-graphql diff --git a/examples/docs_snippets/docs_snippets/guides/migrations/migrating_airflow_to_dagster.py b/examples/docs_snippets/docs_snippets/guides/migrations/migrating_airflow_to_dagster.py index c546978f13096..e88e8c01e467a 100644 --- a/examples/docs_snippets/docs_snippets/guides/migrations/migrating_airflow_to_dagster.py +++ b/examples/docs_snippets/docs_snippets/guides/migrations/migrating_airflow_to_dagster.py @@ -1,5 +1,6 @@ # ruff: isort: skip_file # ruff: noqa: T201,D415 +# type: ignore # problematic imports in example code def scope_simple_airflow_task(): diff --git a/examples/docs_snippets/docs_snippets/integrations/airlift/operator_migration/kubernetes_pod_operator.py b/examples/docs_snippets/docs_snippets/integrations/airlift/operator_migration/kubernetes_pod_operator.py index bde2a3e5b1a86..aeafe9c74b84f 100644 --- a/examples/docs_snippets/docs_snippets/integrations/airlift/operator_migration/kubernetes_pod_operator.py +++ b/examples/docs_snippets/docs_snippets/integrations/airlift/operator_migration/kubernetes_pod_operator.py @@ -1,3 +1,4 @@ +# type: ignore from airflow.providers.cncf.kubernetes.operators.pod import KubernetesPodOperator k8s_hello_world = KubernetesPodOperator( diff --git a/examples/experimental/external_assets/airflow_example.py b/examples/experimental/external_assets/airflow_example.py index 9585d06d701f6..b326ea323bfbc 100644 --- a/examples/experimental/external_assets/airflow_example.py +++ b/examples/experimental/external_assets/airflow_example.py @@ -1,3 +1,4 @@ +# type: ignore from airflow import DAG from airflow.providers.cncf.kubernetes.operators.kubernetes_pod import KubernetesPodOperator from pendulum import datetime diff --git a/examples/project_atproto_dashboard/.env.example b/examples/project_atproto_dashboard/.env.example new file mode 100644 index 0000000000000..4ea1e239f6bb6 --- /dev/null +++ b/examples/project_atproto_dashboard/.env.example @@ -0,0 +1,17 @@ +AWS_ENDPOINT_URL= +AWS_ACCESS_KEY_ID= +AWS_SECRET_ACCESS_KEY= +AWS_BUCKET_NAME= +AWS_ACCOUNT_ID= + +MOTHERDUCK_TOKEN= + +BSKY_LOGIN= +BSKY_APP_PASSWORD= + +DBT_TARGET= + +AZURE_POWERBI_CLIENT_ID= +AZURE_POWERBI_CLIENT_SECRET= +AZURE_POWERBI_TENANT_ID= +AZURE_POWERBI_WORKSPACE_ID= diff --git a/examples/project_atproto_dashboard/.gitignore b/examples/project_atproto_dashboard/.gitignore new file mode 100644 index 0000000000000..ace8bc76e6a41 --- /dev/null +++ b/examples/project_atproto_dashboard/.gitignore @@ -0,0 +1,5 @@ +tmp*/ +storage/ +schedules/ +history/ +atproto-session.txt diff --git a/examples/project_atproto_dashboard/README.md b/examples/project_atproto_dashboard/README.md new file mode 100644 index 0000000000000..4c15d93cc89cf --- /dev/null +++ b/examples/project_atproto_dashboard/README.md @@ -0,0 +1,52 @@ +# project_atproto_dashboard + +An end-to-end demonstration of ingestion data from the ATProto API, modeling it with dbt, and presenting it with Power BI. + +![Architecture Diagram](./architecture-diagram.png) + +![Project asset lineage](./lineage.svg) + +## Features used + +1. Ingestion of data-related Bluesky posts + - Dynamic partitions + - Declarative automation + - Concurrency limits +2. Modelling data using _dbt_ +3. Representing data in a dashboard + +## Getting started + +### Environment Setup + +Ensure the following environments have been populated in your `.env` file. Start by copying the +template. + +``` +cp .env.example .env +``` + +And then populate the fields. + +### Development + +Install the project dependencies: + + pip install -e ".[dev]" + +Start Dagster: + + DAGSTER_HOME=$(pwd) dagster dev + +### Unit testing + +Tests are in the `project_atproto_dashboard_tests` directory and you can run tests using `pytest`: + + pytest project_atproto_dashboard_tests + +## Resources + +- https://docs.bsky.app/docs/tutorials/viewing-feeds +- https://docs.bsky.app/docs/advanced-guides/rate-limits +- https://atproto.blue/en/latest/atproto_client/auth.html#session-string +- https://tenacity.readthedocs.io/en/latest/#waiting-before-retrying diff --git a/examples/project_atproto_dashboard/architecture-diagram.png b/examples/project_atproto_dashboard/architecture-diagram.png new file mode 100644 index 0000000000000..af16cc6c57430 Binary files /dev/null and b/examples/project_atproto_dashboard/architecture-diagram.png differ diff --git a/examples/project_atproto_dashboard/dagster.yaml b/examples/project_atproto_dashboard/dagster.yaml new file mode 100644 index 0000000000000..c9705420e83ca --- /dev/null +++ b/examples/project_atproto_dashboard/dagster.yaml @@ -0,0 +1,6 @@ +run_coordinator: + module: dagster.core.run_coordinator + class: QueuedRunCoordinator + +concurrency: + default_op_concurrency_limit: 1 diff --git a/examples/project_atproto_dashboard/dbt_project/.gitignore b/examples/project_atproto_dashboard/dbt_project/.gitignore new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/examples/project_atproto_dashboard/dbt_project/.sqlfluff b/examples/project_atproto_dashboard/dbt_project/.sqlfluff new file mode 100644 index 0000000000000..6fffb098b0115 --- /dev/null +++ b/examples/project_atproto_dashboard/dbt_project/.sqlfluff @@ -0,0 +1,2 @@ +[sqlfluff:rules:capitalisation.keywords] +capitalisation_policy = upper diff --git a/examples/project_atproto_dashboard/dbt_project/dbt_project.yml b/examples/project_atproto_dashboard/dbt_project/dbt_project.yml new file mode 100644 index 0000000000000..5dc13e8c3997a --- /dev/null +++ b/examples/project_atproto_dashboard/dbt_project/dbt_project.yml @@ -0,0 +1,13 @@ +name: "dbt_project" +version: "1.0.0" +config-version: 2 + +profile: "bluesky" + +target-path: "target" +clean-targets: + - "target" + - "dbt_packages" + +models: + +materialized: table diff --git a/examples/project_atproto_dashboard/dbt_project/models/analysis/activity_over_time.sql b/examples/project_atproto_dashboard/dbt_project/models/analysis/activity_over_time.sql new file mode 100644 index 0000000000000..794065c8723e7 --- /dev/null +++ b/examples/project_atproto_dashboard/dbt_project/models/analysis/activity_over_time.sql @@ -0,0 +1,14 @@ +WITH final AS ( + SELECT + date_trunc('day', created_at) AS post_date, + count(DISTINCT post_text) AS unique_posts, + count(DISTINCT author_handle) AS active_authors, + sum(likes) AS total_likes, + sum(replies) AS total_comments, + sum(quotes) AS total_quotes + FROM {{ ref("latest_feed") }} + GROUP BY date_trunc('day', created_at) + ORDER BY date_trunc('day', created_at) DESC +) + +SELECT * FROM final diff --git a/examples/project_atproto_dashboard/dbt_project/models/analysis/all_profiles.sql b/examples/project_atproto_dashboard/dbt_project/models/analysis/all_profiles.sql new file mode 100644 index 0000000000000..5f4e21734bff1 --- /dev/null +++ b/examples/project_atproto_dashboard/dbt_project/models/analysis/all_profiles.sql @@ -0,0 +1,105 @@ +WITH max_profile_data AS ( + SELECT + json_extract_string(json, '$.subject.did') AS profile_did, + max( + strptime( + regexp_extract( + filename, + 'dagster-demo/atproto_starter_pack_snapshot/(\d{4}-\d{2}-\d{2}/\d{2}/\d{2})', + 1 + ), + '%Y-%m-%d/%H/%M' + ) + ) AS max_extracted_timestamp + FROM {{ ref("stg_profiles") }} + GROUP BY + json_extract_string(json, '$.subject.did') +), + +profiles AS ( + SELECT + json_extract_string(json, '$.subject.handle') AS handle_subject, + json_extract_string(json, '$.subject.did') AS profile_did, + json_extract_string(json, '$.subject.avatar') AS profile_avatar, + json_extract_string(json, '$.subject.display_name') + AS profile_display_name, + json_extract_string(json, '$.subject.created_at') + AS profile_created_date, + json_extract_string(json, '$.subject.description') + AS profile_description + FROM {{ ref("stg_profiles") }} stg_prof + JOIN max_profile_data + ON + json_extract_string(stg_prof.json, '$.subject.did') + = max_profile_data.profile_did + AND strptime( + regexp_extract( + stg_prof.filename, + 'dagster-demo/atproto_starter_pack_snapshot/(\d{4}-\d{2}-\d{2}/\d{2}/\d{2})', + 1 + ), + '%Y-%m-%d/%H/%M' + ) + = max_profile_data.max_extracted_timestamp +), + +user_aggregates AS ( + SELECT + replace(author_handle, '"', '') AS author_handle, + count(*) AS num_posts, + avg(cast(lf.likes AS int)) AS average_likes, + sum(cast(lf.likes AS int)) AS total_likes, + sum(cast(lf.replies AS int)) AS total_replies, + sum(cast(lf.likes AS int)) / count(*) AS total_likes_by_num_of_posts, + round( + count(*) + / count(DISTINCT date_trunc('day', cast(created_at AS timestamp))), + 2 + ) AS avg_posts_per_day, + ntile(100) + OVER ( + ORDER BY sum(cast(lf.likes AS int)) + ) + AS likes_percentile, + ntile(100) + OVER ( + ORDER BY sum(cast(lf.replies AS int)) + ) + AS replies_percentile, + ntile(100) OVER ( + ORDER BY count(*) + ) AS posts_percentile, + (ntile(100) OVER ( + ORDER BY sum(cast(lf.likes AS int))) + ntile(100) OVER ( + ORDER BY sum(cast(lf.replies AS int))) + ntile(100) OVER ( + ORDER BY count(*) + )) + / 3.0 AS avg_score + FROM {{ ref("latest_feed") }} lf + GROUP BY replace(author_handle, '"', '') +), + +final AS ( + SELECT DISTINCT + profiles.handle_subject AS profile_handle, + profiles.profile_did, + profiles.profile_display_name, + profiles.profile_avatar, + profiles.profile_created_date, + profiles.profile_description, + user_aggregates.num_posts, + user_aggregates.average_likes, + user_aggregates.total_likes, + user_aggregates.total_replies, + user_aggregates.total_likes_by_num_of_posts, + user_aggregates.avg_posts_per_day, + user_aggregates.likes_percentile, + user_aggregates.replies_percentile, + user_aggregates.posts_percentile, + user_aggregates.avg_score + FROM profiles + LEFT JOIN user_aggregates + ON user_aggregates.author_handle = profiles.handle_subject +) + +SELECT * FROM final diff --git a/examples/project_atproto_dashboard/dbt_project/models/analysis/calendar.sql b/examples/project_atproto_dashboard/dbt_project/models/analysis/calendar.sql new file mode 100644 index 0000000000000..91f1ae0ea62e0 --- /dev/null +++ b/examples/project_atproto_dashboard/dbt_project/models/analysis/calendar.sql @@ -0,0 +1,45 @@ +WITH date_spine AS ( + SELECT CAST(range AS DATE) AS date_key + FROM RANGE( + (SELECT MIN(created_at) FROM {{ ref("latest_feed") }}), + CURRENT_DATE(), + INTERVAL 1 DAY + ) +) + +SELECT + date_key AS date_key, + DAYOFYEAR(date_key) AS day_of_year, + WEEKOFYEAR(date_key) AS week_of_year, + DAYOFWEEK(date_key) AS day_of_week, + ISODOW(date_key) AS iso_day_of_week, + DAYNAME(date_key) AS day_name, + DATE_TRUNC('week', date_key) AS first_day_of_week, + DATE_TRUNC('week', date_key) + 6 AS last_day_of_week, + YEAR(date_key) || RIGHT('0' || MONTH(date_key), 2) AS month_key, + MONTH(date_key) AS month_of_year, + DAYOFMONTH(date_key) AS day_of_month, + LEFT(MONTHNAME(date_key), 3) AS month_name_short, + MONTHNAME(date_key) AS month_name, + DATE_TRUNC('month', date_key) AS first_day_of_month, + LAST_DAY(date_key) AS last_day_of_month, + CAST(YEAR(date_key) || QUARTER(date_key) AS INT) AS quarter_key, + QUARTER(date_key) AS quarter_of_year, + CAST(date_key - DATE_TRUNC('Quarter', date_key) + 1 AS INT) + AS day_of_quarter, + ('Q' || QUARTER(date_key)) AS quarter_desc_short, + ('Quarter ' || QUARTER(date_key)) AS quarter_desc, + DATE_TRUNC('quarter', date_key) AS first_day_of_quarter, + LAST_DAY(DATE_TRUNC('quarter', date_key) + INTERVAL 2 MONTH) + AS last_day_of_quarter, + CAST(YEAR(date_key) AS INT) AS year_key, + DATE_TRUNC('Year', date_key) AS first_day_of_year, + DATE_TRUNC('Year', date_key) - 1 + INTERVAL 1 YEAR AS last_day_of_year, + ROW_NUMBER() + OVER ( + PARTITION BY YEAR(date_key), MONTH(date_key), DAYOFWEEK(date_key) + ORDER BY date_key + ) + AS ordinal_weekday_of_month +FROM date_spine +WHERE CAST(YEAR(date_key) AS INT) >= 2020 diff --git a/examples/project_atproto_dashboard/dbt_project/models/analysis/latest_feed.sql b/examples/project_atproto_dashboard/dbt_project/models/analysis/latest_feed.sql new file mode 100644 index 0000000000000..09f0e23ab1855 --- /dev/null +++ b/examples/project_atproto_dashboard/dbt_project/models/analysis/latest_feed.sql @@ -0,0 +1,57 @@ +WITH max_update AS ( + SELECT + max( + strptime( + regexp_extract( + filename, + 'dagster-demo/atproto_actor_feed_snapshot/(\d{4}-\d{2}-\d{2}/\d{2}/\d{2})', + 1 + ), + '%Y-%m-%d/%H/%M' + ) + ) AS max_extracted_timestamp, + regexp_extract(filename, 'did:(.*?)\.json') AS profile_id + FROM {{ ref("stg_feed_snapshots") }} + GROUP BY + regexp_extract(filename, 'did:(.*?)\.json') +), + +final AS ( + SELECT + json_extract_string(sfs.json, '$.post.author.handle') AS author_handle, + json_extract_string(sfs.json, '$.post.author.did') AS author_id, + cast(sfs.json.post.like_count AS int) AS likes, + cast(sfs.json.post.quote_count AS int) AS quotes, + cast(sfs.json.post.reply_count AS int) AS replies, + json_extract_string(sfs.json, '$.post.record.text') AS post_text, + sfs.json.post.record.embed, + json_extract_string( + sfs.json, '$.post.record.embed.external.description' + ) AS external_embed_description, + json_extract_string(sfs.json, '$.post.record.embed.external.uri') + AS external_embed_link, + sfs.json.post.record.embed.external.thumb AS external_embed_thumbnail, + cast(sfs.json.post.record.created_at AS timestamp) AS created_at, + CASE + WHEN json_extract_string(sfs.json.post.record.embed, '$.images[0].image.ref.link') IS NULL THEN NULL + ELSE concat('https://cdn.bsky.app/img/feed_thumbnail/plain/', json_extract_string(sfs.json, '$.post.author.did') ,'/' ,json_extract_string(sfs.json.post.record.embed, '$.images[0].image.ref.link'), '@jpeg') + END AS image_url, + max_update.max_extracted_timestamp, + max_update.profile_id + FROM {{ ref("stg_feed_snapshots") }} sfs + JOIN max_update + ON + max_update.profile_id + = regexp_extract(sfs.filename, 'did:(.*?)\.json') + AND max_update.max_extracted_timestamp + = strptime( + regexp_extract( + sfs.filename, + 'dagster-demo/atproto_actor_feed_snapshot/(\d{4}-\d{2}-\d{2}/\d{2}/\d{2})', + 1 + ), + '%Y-%m-%d/%H/%M' + ) +) + +SELECT * FROM final diff --git a/examples/project_atproto_dashboard/dbt_project/models/analysis/schema.yml b/examples/project_atproto_dashboard/dbt_project/models/analysis/schema.yml new file mode 100644 index 0000000000000..404d89541a5db --- /dev/null +++ b/examples/project_atproto_dashboard/dbt_project/models/analysis/schema.yml @@ -0,0 +1,18 @@ +version: 2 + +models: + - name: all_profiles + description: "table showing data for all the profiles posts are collected from and some high level statistics" + columns: + - name: profile_handle + data_tests: + - unique + - not_null + - name: latest_feed + description: "the latest feed of posts" + - name: activity_over_time + description: "daily activity of posts overtime" + - name: top_daily_posts + description: "top posts ranked for a given day" + - name: top_external_links + description: "top external content grouped by type shared in the community" diff --git a/examples/project_atproto_dashboard/dbt_project/models/analysis/top_daily_posts.sql b/examples/project_atproto_dashboard/dbt_project/models/analysis/top_daily_posts.sql new file mode 100644 index 0000000000000..a53c8435f60c6 --- /dev/null +++ b/examples/project_atproto_dashboard/dbt_project/models/analysis/top_daily_posts.sql @@ -0,0 +1,46 @@ +WITH distinct_posts AS ( + SELECT DISTINCT ON (author_handle, post_text, date_trunc('day', created_at)) + author_handle, + post_text, + likes, + quotes, + replies, + image_url, + external_embed_link, + external_embed_thumbnail, + external_embed_description, + created_at + FROM {{ ref("latest_feed") }} +), + +scored_posts AS ( + SELECT + *, + (likes * 0.2) + (quotes * 0.4) + (replies * 0.4) AS engagement_score, + date_trunc('day', created_at) AS post_date, + row_number() OVER ( + PARTITION BY date_trunc('day', created_at) + ORDER BY (likes * 0.2) + (quotes * 0.4) + (replies * 0.4) DESC + ) AS daily_rank + FROM distinct_posts +), + +final AS ( + SELECT + post_date, + author_handle, + post_text, + likes, + quotes, + replies, + image_url, + external_embed_link, + external_embed_thumbnail, + external_embed_description, + round(engagement_score, 2) AS engagement_score, + daily_rank + FROM scored_posts + WHERE daily_rank <= 10 +) + +SELECT * FROM final diff --git a/examples/project_atproto_dashboard/dbt_project/models/analysis/top_external_links.sql b/examples/project_atproto_dashboard/dbt_project/models/analysis/top_external_links.sql new file mode 100644 index 0000000000000..5e207b0d664be --- /dev/null +++ b/examples/project_atproto_dashboard/dbt_project/models/analysis/top_external_links.sql @@ -0,0 +1,73 @@ +WITH distinct_posts AS ( + SELECT DISTINCT ON (author_handle, post_text, date_trunc('day', created_at)) + author_handle, + post_text, + likes, + quotes, + replies, + created_at, + image_url, + embed, + external_embed_link, + external_embed_thumbnail, + external_embed_description, + CASE + WHEN external_embed_link LIKE '%youtu%' THEN 'YouTube' + WHEN external_embed_link LIKE '%docs%' THEN 'Docs' + WHEN external_embed_link LIKE '%github%' THEN 'GitHub' + WHEN external_embed_link LIKE '%substack%' THEN 'SubStack' + WHEN external_embed_link LIKE '%twitch%' THEN 'Twitch' + WHEN external_embed_link LIKE '%msnbc%' THEN 'News' + WHEN external_embed_link LIKE '%theguardian%' THEN 'News' + WHEN external_embed_link LIKE '%foreignpolicy%' THEN 'News' + WHEN external_embed_link LIKE '%nytimes%' THEN 'News' + WHEN external_embed_link LIKE '%wsj%' THEN 'News' + WHEN external_embed_link LIKE '%bloomberg%' THEN 'News' + WHEN external_embed_link LIKE '%theverge%' THEN 'News' + WHEN external_embed_link LIKE '%cnbc%' THEN 'News' + WHEN external_embed_link LIKE '%.ft.%' THEN 'News' + WHEN external_embed_link LIKE '%washingtonpost%' THEN 'News' + WHEN external_embed_link LIKE '%newrepublic%' THEN 'News' + WHEN external_embed_link LIKE '%huffpost%' THEN 'News' + WHEN external_embed_link LIKE '%wired%' THEN 'News' + WHEN external_embed_link LIKE '%medium%' THEN 'Medium' + WHEN external_embed_link LIKE '%reddit%' THEN 'Reddit' + WHEN external_embed_link LIKE '%/blog/%' THEN 'Blog' + ELSE 'Other' + END AS external_link_type + FROM {{ ref("latest_feed") }} + WHERE external_embed_link IS NOT null +), + +scored_posts AS ( + SELECT + *, + (likes * 0.2) + (quotes * 0.4) + (replies * 0.4) AS engagement_score, + date_trunc('day', created_at) AS post_date, + row_number() OVER ( + PARTITION BY date_trunc('day', created_at), external_link_type + ORDER BY (likes * 0.2) + (quotes * 0.4) + (replies * 0.4) DESC + ) AS daily_rank + FROM distinct_posts +), + +final AS ( + SELECT + post_date, + author_handle, + post_text, + likes, + quotes, + replies, + round(engagement_score, 2) AS engagement_score, + daily_rank, + embed, + external_embed_link, + external_embed_thumbnail, + external_embed_description, + external_link_type + FROM scored_posts + WHERE daily_rank <= 10 +) + +SELECT * FROM final diff --git a/examples/project_atproto_dashboard/dbt_project/models/sources.yml b/examples/project_atproto_dashboard/dbt_project/models/sources.yml new file mode 100644 index 0000000000000..8b9e72a31b2bc --- /dev/null +++ b/examples/project_atproto_dashboard/dbt_project/models/sources.yml @@ -0,0 +1,14 @@ +version: 2 + +sources: + - name: r2_bucket + tables: + - name: actor_feed_snapshot + description: "external r2 bucket with json files of actor feeds" + meta: + external_location: "read_ndjson_objects('r2://dagster-demo/atproto_actor_feed_snapshot/**/*.json', filename=true)" + - name: starter_pack_snapshot + description: "external r2 bucket with json files for feed snapshots" + meta: + external_location: "read_ndjson_objects('r2://dagster-demo/atproto_starter_pack_snapshot/**/*.json', filename=true)" + diff --git a/examples/project_atproto_dashboard/dbt_project/models/staging/schema.yml b/examples/project_atproto_dashboard/dbt_project/models/staging/schema.yml new file mode 100644 index 0000000000000..61f4b3d774b18 --- /dev/null +++ b/examples/project_atproto_dashboard/dbt_project/models/staging/schema.yml @@ -0,0 +1,7 @@ +version: 2 + +models: + - name: stg_profiles + description: "raw data from r2 bucket" + - name: stg_feed_snapshots + description: "raw posts data from r2 bucket" \ No newline at end of file diff --git a/examples/project_atproto_dashboard/dbt_project/models/staging/stg_feed_snapshots.sql b/examples/project_atproto_dashboard/dbt_project/models/staging/stg_feed_snapshots.sql new file mode 100644 index 0000000000000..92674ee054769 --- /dev/null +++ b/examples/project_atproto_dashboard/dbt_project/models/staging/stg_feed_snapshots.sql @@ -0,0 +1,5 @@ +WITH raw AS ( + SELECT * FROM {{ source('r2_bucket', 'actor_feed_snapshot') }} +) + +SELECT * FROM raw diff --git a/examples/project_atproto_dashboard/dbt_project/models/staging/stg_profiles.sql b/examples/project_atproto_dashboard/dbt_project/models/staging/stg_profiles.sql new file mode 100644 index 0000000000000..7e4eeba113e2c --- /dev/null +++ b/examples/project_atproto_dashboard/dbt_project/models/staging/stg_profiles.sql @@ -0,0 +1,5 @@ +WITH raw AS ( + SELECT * FROM {{ source('r2_bucket', 'starter_pack_snapshot') }} +) + +SELECT * FROM raw diff --git a/examples/project_atproto_dashboard/dbt_project/profiles.yml b/examples/project_atproto_dashboard/dbt_project/profiles.yml new file mode 100644 index 0000000000000..462a3a6a9e31b --- /dev/null +++ b/examples/project_atproto_dashboard/dbt_project/profiles.yml @@ -0,0 +1,27 @@ +bluesky: + target: prod + outputs: + dev: + type: duckdb + schema: bluesky_dev + path: "local.duckdb" + threads: 16 + extensions: + - httpfs + settings: + s3_region: "auto" + s3_access_key_id: "{{ env_var('AWS_ACCESS_KEY_ID') }}" + s3_secret_access_key: "{{ env_var('AWS_SECRET_ACCESS_KEY') }}" + s3_endpoint: "{{ env_var('AWS_ENDPOINT_URL') | replace('https://', '') }}" + prod: + type: duckdb + schema: bluesky + path: "md:prod_bluesky?MOTHERDUCK_TOKEN={{ env_var('MOTHERDUCK_TOKEN') }}" + threads: 16 + extensions: + - httpfs + settings: + s3_region: "auto" + s3_access_key_id: "{{ env_var('AWS_ACCESS_KEY_ID') }}" + s3_secret_access_key: "{{ env_var('AWS_SECRET_ACCESS_KEY') }}" + s3_endpoint: "{{ env_var('AWS_ENDPOINT_URL') | replace('https://', '') }}" diff --git a/examples/project_atproto_dashboard/lineage.svg b/examples/project_atproto_dashboard/lineage.svg new file mode 100644 index 0000000000000..578247eaff975 --- /dev/null +++ b/examples/project_atproto_dashboard/lineage.svg @@ -0,0 +1,3 @@ +
default
project_atproto_dashboard.definitions
staging
project_atproto_dashboard.definitions
analysis
project_atproto_dashboard.definitions
ingestion
project_atproto_dashboard.definitions
reporting
project_atproto_dashboard.definitions
Daily Rank
No description
Loading...
activity_over_time
daily activity of posts overtime
Never materialized
dbt
DuckDB
actor_feed_snapshot
Snapshot of full user feed written to S3 storage.
Loading...
Python
all_profiles
table showing data for all the profiles posts are collected from and some high level statistics
Never materialized
Checks
2
dbt
DuckDB
calendar
dbt model calendar
Never materialized
dbt
DuckDB
latest_feed
the latest feed of posts
Never materialized
dbt
DuckDB
powerbi_bluesky_report
No description
Power BI
Report
powerbi_bluesky_model
No description
Power BI
Semantic Model
starter_pack_snapshot
Snapshot of members in a Bluesky starter pack partitioned by starter pack ID and written to S3 storage.
Loading...
Python
stg_feed_snapshots
raw posts data from r2 bucket
Never materialized
dbt
DuckDB
stg_profiles
raw data from r2 bucket
Never materialized
dbt
DuckDB
top_daily_posts
top posts ranked for a given day
Never materialized
dbt
DuckDB
top_external_links
top external content grouped by type shared in the community
Never materialized
dbt
DuckDB
\ No newline at end of file diff --git a/examples/project_atproto_dashboard/project_atproto_dashboard/__init__.py b/examples/project_atproto_dashboard/project_atproto_dashboard/__init__.py new file mode 100644 index 0000000000000..8b137891791fe --- /dev/null +++ b/examples/project_atproto_dashboard/project_atproto_dashboard/__init__.py @@ -0,0 +1 @@ + diff --git a/examples/project_atproto_dashboard/project_atproto_dashboard/dashboard/__init__.py b/examples/project_atproto_dashboard/project_atproto_dashboard/dashboard/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/examples/project_atproto_dashboard/project_atproto_dashboard/dashboard/definitions.py b/examples/project_atproto_dashboard/project_atproto_dashboard/dashboard/definitions.py new file mode 100644 index 0000000000000..dba89ead146d3 --- /dev/null +++ b/examples/project_atproto_dashboard/project_atproto_dashboard/dashboard/definitions.py @@ -0,0 +1,49 @@ +import dagster as dg +from dagster_powerbi import ( + DagsterPowerBITranslator, + PowerBIServicePrincipal, + PowerBIWorkspace, + load_powerbi_asset_specs, +) +from dagster_powerbi.translator import PowerBIContentData + +power_bi_workspace = PowerBIWorkspace( + credentials=PowerBIServicePrincipal( + client_id=dg.EnvVar("AZURE_POWERBI_CLIENT_ID"), + client_secret=dg.EnvVar("AZURE_POWERBI_CLIENT_SECRET"), + tenant_id=dg.EnvVar("AZURE_POWERBI_TENANT_ID"), + ), + workspace_id=dg.EnvVar("AZURE_POWERBI_WORKSPACE_ID"), +) + + +class CustomDagsterPowerBITranslator(DagsterPowerBITranslator): + def get_report_spec(self, data: PowerBIContentData) -> dg.AssetSpec: + return ( + super() + .get_report_spec(data) + .replace_attributes( + group_name="reporting", + ) + ) + + def get_semantic_model_spec(self, data: PowerBIContentData) -> dg.AssetSpec: + upsteam_table_deps = [ + dg.AssetKey(table.get("name")) for table in data.properties.get("tables", []) + ] + return ( + super() + .get_semantic_model_spec(data) + .replace_attributes( + group_name="reporting", + deps=upsteam_table_deps, + ) + ) + + +power_bi_specs = load_powerbi_asset_specs( + power_bi_workspace, + dagster_powerbi_translator=CustomDagsterPowerBITranslator, +) + +defs = dg.Definitions(assets=[*power_bi_specs], resources={"power_bi": power_bi_workspace}) diff --git a/examples/project_atproto_dashboard/project_atproto_dashboard/definitions.py b/examples/project_atproto_dashboard/project_atproto_dashboard/definitions.py new file mode 100644 index 0000000000000..f2a2b5ebd5f2e --- /dev/null +++ b/examples/project_atproto_dashboard/project_atproto_dashboard/definitions.py @@ -0,0 +1,9 @@ +import dagster as dg + +import project_atproto_dashboard.dashboard.definitions as dashboard_definitions +import project_atproto_dashboard.ingestion.definitions as ingestion_definitions +import project_atproto_dashboard.modeling.definitions as modeling_definitions + +defs = dg.Definitions.merge( + ingestion_definitions.defs, modeling_definitions.defs, dashboard_definitions.defs +) diff --git a/examples/project_atproto_dashboard/project_atproto_dashboard/ingestion/__init__.py b/examples/project_atproto_dashboard/project_atproto_dashboard/ingestion/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/examples/project_atproto_dashboard/project_atproto_dashboard/ingestion/definitions.py b/examples/project_atproto_dashboard/project_atproto_dashboard/ingestion/definitions.py new file mode 100644 index 0000000000000..41b2f2e969c6b --- /dev/null +++ b/examples/project_atproto_dashboard/project_atproto_dashboard/ingestion/definitions.py @@ -0,0 +1,138 @@ +import os +from datetime import datetime + +import dagster as dg +from dagster_aws.s3 import S3Resource + +from project_atproto_dashboard.ingestion.resources import ATProtoResource +from project_atproto_dashboard.ingestion.utils.atproto import ( + get_all_feed_items, + get_all_starter_pack_members, +) + +AWS_BUCKET_NAME = os.environ.get("AWS_BUCKET_NAME", "dagster-demo") + + +atproto_did_dynamic_partition = dg.DynamicPartitionsDefinition(name="atproto_did_dynamic_partition") + + +@dg.asset( + partitions_def=dg.StaticPartitionsDefinition( + partition_keys=[ + "at://did:plc:lc5jzrr425fyah724df3z5ik/app.bsky.graph.starterpack/3l7cddlz5ja24", # https://bsky.app/starter-pack/christiannolan.bsky.social/3l7cddlz5ja24 + ] + ), + automation_condition=dg.AutomationCondition.on_cron("0 0 * * *"), # Midnight + kinds={"python"}, + group_name="ingestion", +) +def starter_pack_snapshot( + context: dg.AssetExecutionContext, + atproto_resource: ATProtoResource, + s3_resource: S3Resource, +) -> dg.MaterializeResult: + """Snapshot of members in a Bluesky starter pack partitioned by starter pack ID and written to S3 storage. + + Args: + context (AssetExecutionContext) Dagster context + atproto_resource (ATProtoResource) Resource for interfacing with atmosphere protocol + s3_resource (S3Resource) Resource for uploading files to S3 storage + + """ + atproto_client = atproto_resource.get_client() + + starter_pack_uri = context.partition_key + + list_items = get_all_starter_pack_members(atproto_client, starter_pack_uri) + + _bytes = os.linesep.join([member.model_dump_json() for member in list_items]).encode("utf-8") + + datetime_now = datetime.now() + object_key = "/".join( + ( + "atproto_starter_pack_snapshot", + datetime_now.strftime("%Y-%m-%d"), + datetime_now.strftime("%H"), + datetime_now.strftime("%M"), + f"{starter_pack_uri}.json", + ) + ) + + s3_resource.get_client().put_object(Body=_bytes, Bucket=AWS_BUCKET_NAME, Key=object_key) + + context.instance.add_dynamic_partitions( + partitions_def_name="atproto_did_dynamic_partition", + partition_keys=[list_item_view.subject.did for list_item_view in list_items], + ) + + return dg.MaterializeResult( + metadata={ + "len_members": len(list_items), + "s3_object_key": object_key, + } + ) + + +@dg.asset( + partitions_def=atproto_did_dynamic_partition, + deps=[dg.AssetDep(starter_pack_snapshot, partition_mapping=dg.AllPartitionMapping())], + automation_condition=dg.AutomationCondition.eager(), + kinds={"python"}, + group_name="ingestion", + op_tags={"dagster/concurrency_key": "ingestion"}, +) +def actor_feed_snapshot( + context: dg.AssetExecutionContext, + atproto_resource: ATProtoResource, + s3_resource: S3Resource, +) -> dg.MaterializeResult: + """Snapshot of full user feed written to S3 storage.""" + client = atproto_resource.get_client() + actor_did = context.partition_key + + # NOTE: we may need to yield chunks to be more memory efficient + items = get_all_feed_items(client, actor_did) + + datetime_now = datetime.now() + + object_key = "/".join( + ( + "atproto_actor_feed_snapshot", + datetime_now.strftime("%Y-%m-%d"), + datetime_now.strftime("%H"), + datetime_now.strftime("%M"), + f"{actor_did}.json", + ) + ) + + _bytes = os.linesep.join([item.model_dump_json() for item in items]).encode("utf-8") + + s3_resource.get_client().put_object(Body=_bytes, Bucket=AWS_BUCKET_NAME, Key=object_key) + + return dg.MaterializeResult( + metadata={ + "len_feed_items": len(items), + "s3_object_key": object_key, + } + ) + + +atproto_resource = ATProtoResource( + login=dg.EnvVar("BSKY_LOGIN"), password=dg.EnvVar("BSKY_APP_PASSWORD") +) + +s3_resource = S3Resource( + endpoint_url=dg.EnvVar("AWS_ENDPOINT_URL"), + aws_access_key_id=dg.EnvVar("AWS_ACCESS_KEY_ID"), + aws_secret_access_key=dg.EnvVar("AWS_SECRET_ACCESS_KEY"), + region_name="auto", +) + + +defs = dg.Definitions( + assets=[starter_pack_snapshot, actor_feed_snapshot], + resources={ + "atproto_resource": atproto_resource, + "s3_resource": s3_resource, + }, +) diff --git a/examples/project_atproto_dashboard/project_atproto_dashboard/ingestion/resources.py b/examples/project_atproto_dashboard/project_atproto_dashboard/ingestion/resources.py new file mode 100644 index 0000000000000..38163e85896df --- /dev/null +++ b/examples/project_atproto_dashboard/project_atproto_dashboard/ingestion/resources.py @@ -0,0 +1,29 @@ +import os + +import dagster as dg +from atproto import Client + + +class ATProtoResource(dg.ConfigurableResource): + login: str + password: str + session_cache_path: str = "atproto-session.txt" + + def _login(self, client): + """Create a re-usable session to be used across resource instances; we are rate limited to 30/5 minutes or 300/day session.""" + if os.path.exists(self.session_cache_path): + with open(self.session_cache_path, "r") as f: + session_string = f.read() + client.login(session_string=session_string) + else: + client.login(login=self.login, password=self.password) + session_string = client.export_session_string() + with open(self.session_cache_path, "w") as f: + f.write(session_string) + + def get_client( + self, + ) -> Client: + client = Client() + self._login(client) + return client diff --git a/examples/project_atproto_dashboard/project_atproto_dashboard/ingestion/utils/__init__.py b/examples/project_atproto_dashboard/project_atproto_dashboard/ingestion/utils/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/examples/project_atproto_dashboard/project_atproto_dashboard/ingestion/utils/atproto.py b/examples/project_atproto_dashboard/project_atproto_dashboard/ingestion/utils/atproto.py new file mode 100644 index 0000000000000..fe8fadb7e857a --- /dev/null +++ b/examples/project_atproto_dashboard/project_atproto_dashboard/ingestion/utils/atproto.py @@ -0,0 +1,59 @@ +from typing import TYPE_CHECKING, List, Optional + +from atproto import Client + +if TYPE_CHECKING: + from atproto_client import models + + +def get_all_feed_items(client: Client, actor: str) -> List["models.AppBskyFeedDefs.FeedViewPost"]: + """Retrieves all author feed items for a given `actor`. + + Args: + client (Client): AT Protocol client + actor (str): author identifier (did) + + Returns: + List['models.AppBskyFeedDefs.FeedViewPost'] list of feed + + """ + import math + + import tenacity + + @tenacity.retry( + stop=tenacity.stop_after_attempt(5), + wait=tenacity.wait_fixed(math.ceil(60 * 2.5)), + ) + def _get_feed_with_retries(client: Client, actor: str, cursor: Optional[str]): + return client.get_author_feed(actor=actor, cursor=cursor, limit=100) + + feed = [] + cursor = None + while True: + data = _get_feed_with_retries(client, actor, cursor) + feed.extend(data.feed) + cursor = data.cursor + if not cursor: + break + + return feed + + +def get_all_list_members(client: Client, list_uri: str): + cursor = None + members = [] + while True: + response = client.app.bsky.graph.get_list( + {"list": list_uri, "cursor": cursor, "limit": 100} + ) + members.extend(response.items) + if not response.cursor: + break + cursor = response.cursor + return members + + +def get_all_starter_pack_members(client: Client, starter_pack_uri: str): + response = client.app.bsky.graph.get_starter_pack({"starter_pack": starter_pack_uri}) + return get_all_list_members(client, response.starter_pack.list.uri) diff --git a/examples/project_atproto_dashboard/project_atproto_dashboard/modeling/__init__.py b/examples/project_atproto_dashboard/project_atproto_dashboard/modeling/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/examples/project_atproto_dashboard/project_atproto_dashboard/modeling/definitions.py b/examples/project_atproto_dashboard/project_atproto_dashboard/modeling/definitions.py new file mode 100644 index 0000000000000..6e7acf2d55f78 --- /dev/null +++ b/examples/project_atproto_dashboard/project_atproto_dashboard/modeling/definitions.py @@ -0,0 +1,45 @@ +import os +from pathlib import Path +from typing import Any, Mapping, Optional + +import dagster as dg +from dagster_dbt import DagsterDbtTranslator, DbtCliResource, DbtProject, dbt_assets + +dbt_project = DbtProject( + project_dir=Path(__file__).joinpath("..", "..", "..", "dbt_project").resolve(), + target=os.getenv("DBT_TARGET"), +) +dbt_project.prepare_if_dev() +dbt_resource = DbtCliResource(project_dir=dbt_project) + + +class CustomizedDagsterDbtTranslator(DagsterDbtTranslator): + def get_group_name(self, dbt_resource_props: Mapping[str, Any]) -> Optional[str]: + asset_path = dbt_resource_props["fqn"][1:-1] + if asset_path: + return "_".join(asset_path) + return "default" + + def get_asset_key(self, dbt_resource_props): + resource_type = dbt_resource_props["resource_type"] + name = dbt_resource_props["name"] + if resource_type == "source": + return dg.AssetKey(name) + else: + return super().get_asset_key(dbt_resource_props) + + +@dbt_assets( + manifest=dbt_project.manifest_path, + dagster_dbt_translator=CustomizedDagsterDbtTranslator(), +) +def dbt_bluesky(context: dg.AssetExecutionContext, dbt: DbtCliResource): + yield from (dbt.cli(["build"], context=context).stream().fetch_row_counts()) + + +defs = dg.Definitions( + assets=[dbt_bluesky], + resources={ + "dbt": dbt_resource, + }, +) diff --git a/examples/project_atproto_dashboard/project_atproto_dashboard_tests/__init__.py b/examples/project_atproto_dashboard/project_atproto_dashboard_tests/__init__.py new file mode 100644 index 0000000000000..8b137891791fe --- /dev/null +++ b/examples/project_atproto_dashboard/project_atproto_dashboard_tests/__init__.py @@ -0,0 +1 @@ + diff --git a/examples/project_atproto_dashboard/project_atproto_dashboard_tests/test_assets.py b/examples/project_atproto_dashboard/project_atproto_dashboard_tests/test_assets.py new file mode 100644 index 0000000000000..8b137891791fe --- /dev/null +++ b/examples/project_atproto_dashboard/project_atproto_dashboard_tests/test_assets.py @@ -0,0 +1 @@ + diff --git a/examples/project_atproto_dashboard/pyproject.toml b/examples/project_atproto_dashboard/pyproject.toml new file mode 100644 index 0000000000000..068f8e743f8a5 --- /dev/null +++ b/examples/project_atproto_dashboard/pyproject.toml @@ -0,0 +1,34 @@ +[project] +name = "project_atproto_dashboard" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.9,<3.13" +dependencies = [ + "atproto", + "dagster", + "dagster-aws", + "dagster-dbt", + "dagster-duckdb", + "dagster-powerbi", + "dbt-duckdb", + "tenacity", +] + +[project.optional-dependencies] +dev = [ + "dagster-webserver", + "pytest", + "ruff", +] + +[build-system] +requires = ["setuptools"] +build-backend = "setuptools.build_meta" + +[tool.dagster] +module_name = "project_atproto_dashboard.definitions" +project_name = "project_atproto_dashboard" + +[tool.setuptools.packages.find] +exclude=["project_atproto_dashboard_tests"] diff --git a/examples/starlift-demo/dbt_example/dagster_defs/utils.py b/examples/starlift-demo/dbt_example/dagster_defs/utils.py index ea4fa0740c2e4..1087a7d454573 100644 --- a/examples/starlift-demo/dbt_example/dagster_defs/utils.py +++ b/examples/starlift-demo/dbt_example/dagster_defs/utils.py @@ -2,9 +2,7 @@ from dagster import AssetsDefinition, AssetSpec, AutomationCondition, Definitions, Nothing from dagster._core.definitions.asset_key import AssetKey -from dagster._core.definitions.decorators.decorator_assets_definition_builder import ( - stringify_asset_key_to_input_name, -) +from dagster._core.definitions.assets import stringify_asset_key_to_input_name from dagster._core.definitions.input import In diff --git a/helm/dagster/schema/schema/charts/dagster/subschema/daemon.py b/helm/dagster/schema/schema/charts/dagster/subschema/daemon.py index 5f9a5524f54b8..04cfa9f039e22 100644 --- a/helm/dagster/schema/schema/charts/dagster/subschema/daemon.py +++ b/helm/dagster/schema/schema/charts/dagster/subschema/daemon.py @@ -94,6 +94,7 @@ class Daemon(BaseModel, extra="forbid"): podSecurityContext: kubernetes.PodSecurityContext securityContext: kubernetes.SecurityContext resources: kubernetes.Resources + checkDbReadyInitContainer: Optional[bool] = None livenessProbe: kubernetes.LivenessProbe readinessProbe: kubernetes.ReadinessProbe startupProbe: kubernetes.StartupProbe diff --git a/helm/dagster/schema/schema/charts/dagster/subschema/flower.py b/helm/dagster/schema/schema/charts/dagster/subschema/flower.py index 6742e3c090f4b..f2a93daae60f3 100644 --- a/helm/dagster/schema/schema/charts/dagster/subschema/flower.py +++ b/helm/dagster/schema/schema/charts/dagster/subschema/flower.py @@ -13,6 +13,7 @@ class Flower(BaseModel): tolerations: kubernetes.Tolerations podSecurityContext: kubernetes.PodSecurityContext securityContext: kubernetes.SecurityContext + checkDbReadyInitContainer: Optional[bool] = None resources: kubernetes.Resources livenessProbe: kubernetes.LivenessProbe startupProbe: kubernetes.StartupProbe diff --git a/helm/dagster/schema/schema/charts/dagster/subschema/run_launcher.py b/helm/dagster/schema/schema/charts/dagster/subschema/run_launcher.py index 7b6d9eaaa143f..c73e9132573c2 100644 --- a/helm/dagster/schema/schema/charts/dagster/subschema/run_launcher.py +++ b/helm/dagster/schema/schema/charts/dagster/subschema/run_launcher.py @@ -40,6 +40,7 @@ class CeleryK8sRunLauncherConfig(BaseModel): podSecurityContext: kubernetes.PodSecurityContext securityContext: kubernetes.SecurityContext resources: kubernetes.Resources + checkDbReadyInitContainer: Optional[bool] = None livenessProbe: kubernetes.LivenessProbe volumeMounts: List[kubernetes.VolumeMount] volumes: List[kubernetes.Volume] diff --git a/helm/dagster/schema/schema/charts/dagster/subschema/webserver.py b/helm/dagster/schema/schema/charts/dagster/subschema/webserver.py index fda17a8acf6fc..d84ddf8c27178 100644 --- a/helm/dagster/schema/schema/charts/dagster/subschema/webserver.py +++ b/helm/dagster/schema/schema/charts/dagster/subschema/webserver.py @@ -34,6 +34,7 @@ class Webserver(BaseModel, extra="forbid"): tolerations: kubernetes.Tolerations podSecurityContext: kubernetes.PodSecurityContext securityContext: kubernetes.SecurityContext + checkDbReadyInitContainer: Optional[bool] = None resources: kubernetes.Resources readinessProbe: kubernetes.ReadinessProbe livenessProbe: kubernetes.LivenessProbe diff --git a/helm/dagster/schema/schema_tests/test_celery_queues.py b/helm/dagster/schema/schema_tests/test_celery_queues.py index 7b84118c5a941..c9548112b795b 100644 --- a/helm/dagster/schema/schema_tests/test_celery_queues.py +++ b/helm/dagster/schema/schema_tests/test_celery_queues.py @@ -307,3 +307,49 @@ def test_scheduler_name(deployment_template: HelmTemplate): deployment = celery_queue_deployments[0] assert deployment.spec.template.spec.scheduler_name == "custom" + + +def test_check_db_container_toggle(deployment_template: HelmTemplate): + # Off test + helm_values = DagsterHelmValues.construct( + runLauncher=RunLauncher( + type=RunLauncherType.CELERY, + config=RunLauncherConfig( + celeryK8sRunLauncher=CeleryK8sRunLauncherConfig.construct( + checkDbReadyInitContainer=False + ) + ), + ) + ) + [daemon_deployment] = deployment_template.render(helm_values) + assert daemon_deployment.spec.template.spec.init_containers is None or "check-db-ready" not in [ + container.name for container in daemon_deployment.spec.template.spec.init_containers + ] + + # On test + helm_values = DagsterHelmValues.construct( + runLauncher=RunLauncher( + type=RunLauncherType.CELERY, + config=RunLauncherConfig( + celeryK8sRunLauncher=CeleryK8sRunLauncherConfig.construct( + checkDbReadyInitContainer=True + ) + ), + ) + ) + [daemon_deployment] = deployment_template.render(helm_values) + assert "check-db-ready" in [ + container.name for container in daemon_deployment.spec.template.spec.init_containers + ] + + # Default test + helm_values = DagsterHelmValues.construct( + runLauncher=RunLauncher( + type=RunLauncherType.CELERY, + config=RunLauncherConfig(celeryK8sRunLauncher=CeleryK8sRunLauncherConfig.construct()), + ) + ) + [daemon_deployment] = deployment_template.render(helm_values) + assert "check-db-ready" in [ + container.name for container in daemon_deployment.spec.template.spec.init_containers + ] diff --git a/helm/dagster/schema/schema_tests/test_dagit.py b/helm/dagster/schema/schema_tests/test_dagit.py index 85b2c9b9c44ab..67ade02507457 100644 --- a/helm/dagster/schema/schema_tests/test_dagit.py +++ b/helm/dagster/schema/schema_tests/test_dagit.py @@ -596,3 +596,34 @@ def test_env_configmap(configmap_template): assert len(cm.data) == 6 assert cm.data["DAGSTER_HOME"] == "/opt/dagster/dagster_home" assert cm.data["TEST_ENV"] == "test_value" + + +def test_check_db_container_toggle(deployment_template: HelmTemplate): + # Off test + helm_values = DagsterHelmValues.construct( + dagsterWebserver=Webserver.construct(checkDbReadyInitContainer=False) + ) + [webserver_deployment] = deployment_template.render(helm_values) + assert ( + webserver_deployment.spec.template.spec.init_containers is None + or "check-db-ready" + not in [ + container.name for container in webserver_deployment.spec.template.spec.init_containers + ] + ) + + # On test + helm_values = DagsterHelmValues.construct( + dagsterWebserver=Webserver.construct(checkDbReadyInitContainer=True) + ) + [webserver_deployment] = deployment_template.render(helm_values) + assert "check-db-ready" in [ + container.name for container in webserver_deployment.spec.template.spec.init_containers + ] + + # Default test + helm_values = DagsterHelmValues.construct(dagsterWebserver=Webserver.construct()) + [webserver_deployment] = deployment_template.render(helm_values) + assert "check-db-ready" in [ + container.name for container in webserver_deployment.spec.template.spec.init_containers + ] diff --git a/helm/dagster/schema/schema_tests/test_dagster_daemon.py b/helm/dagster/schema/schema_tests/test_dagster_daemon.py index eada9cc4d5a5f..00ce79f69c47d 100644 --- a/helm/dagster/schema/schema_tests/test_dagster_daemon.py +++ b/helm/dagster/schema/schema_tests/test_dagster_daemon.py @@ -679,3 +679,30 @@ def test_env_configmap(env_configmap_template): assert len(cm.data) == 6 assert cm.data["DAGSTER_HOME"] == "/opt/dagster/dagster_home" assert cm.data["TEST_ENV"] == "test_value" + + +def test_check_db_container_toggle(template: HelmTemplate): + # Off test + helm_values = DagsterHelmValues.construct( + dagsterDaemon=Daemon.construct(checkDbReadyInitContainer=False) + ) + [daemon_deployment] = template.render(helm_values) + assert daemon_deployment.spec.template.spec.init_containers is None or "check-db-ready" not in [ + container.name for container in daemon_deployment.spec.template.spec.init_containers + ] + + # On test + helm_values = DagsterHelmValues.construct( + dagsterDaemon=Daemon.construct(checkDbReadyInitContainer=True) + ) + [daemon_deployment] = template.render(helm_values) + assert "check-db-ready" in [ + container.name for container in daemon_deployment.spec.template.spec.init_containers + ] + + # Default test + helm_values = DagsterHelmValues.construct(dagsterDaemon=Daemon.construct()) + [daemon_deployment] = template.render(helm_values) + assert "check-db-ready" in [ + container.name for container in daemon_deployment.spec.template.spec.init_containers + ] diff --git a/helm/dagster/templates/deployment-celery-queues.yaml b/helm/dagster/templates/deployment-celery-queues.yaml index bbd8f1649be5a..c2f803376c99f 100644 --- a/helm/dagster/templates/deployment-celery-queues.yaml +++ b/helm/dagster/templates/deployment-celery-queues.yaml @@ -40,12 +40,14 @@ spec: securityContext: {{- toYaml $celeryK8sRunLauncherConfig.podSecurityContext | nindent 8 }} initContainers: + {{- if $celeryK8sRunLauncherConfig.checkDbReadyInitContainer }} - name: check-db-ready image: "{{- $.Values.postgresql.image.repository -}}:{{- $.Values.postgresql.image.tag -}}" imagePullPolicy: "{{- $.Values.postgresql.image.pullPolicy -}}" command: ['sh', '-c', {{ include "dagster.postgresql.pgisready" $ | squote }}] securityContext: {{- toYaml $celeryK8sRunLauncherConfig.securityContext | nindent 12 }} + {{- end }} {{- if $.Values.rabbitmq.enabled }} - name: check-rabbitmq-ready image: {{ include "dagster.externalImage.name" $.Values.busybox.image | quote }} diff --git a/helm/dagster/templates/deployment-daemon.yaml b/helm/dagster/templates/deployment-daemon.yaml index b28149be77e10..91b86b5c7ec73 100644 --- a/helm/dagster/templates/deployment-daemon.yaml +++ b/helm/dagster/templates/deployment-daemon.yaml @@ -49,6 +49,7 @@ spec: securityContext: {{- toYaml .Values.dagsterDaemon.podSecurityContext | nindent 8 }} initContainers: + {{- if .Values.dagsterDaemon.checkDbReadyInitContainer }} - name: check-db-ready image: {{ include "dagster.externalImage.name" $.Values.postgresql.image | quote }} imagePullPolicy: "{{- $.Values.postgresql.image.pullPolicy -}}" @@ -57,6 +58,7 @@ spec: {{- toYaml .Values.dagsterDaemon.securityContext | nindent 12 }} resources: {{- toYaml .Values.dagsterDaemon.initContainerResources | nindent 12 }} + {{- end }} {{- if (and $userDeployments.enabled $userDeployments.enableSubchart) }} {{- range $deployment := $userDeployments.deployments }} - name: "init-user-deployment-{{- $deployment.name -}}" diff --git a/helm/dagster/templates/deployment-flower.yaml b/helm/dagster/templates/deployment-flower.yaml index 18dd7b3d6a479..63b9c7d44a515 100644 --- a/helm/dagster/templates/deployment-flower.yaml +++ b/helm/dagster/templates/deployment-flower.yaml @@ -36,12 +36,14 @@ spec: securityContext: {{- toYaml .Values.flower.podSecurityContext | nindent 8 }} initContainers: + {{- if .Values.flower.checkDbReadyInitContainer }} - name: check-db-ready image: "{{- $.Values.postgresql.image.repository -}}:{{- $.Values.postgresql.image.tag -}}" imagePullPolicy: "{{- $.Values.postgresql.image.pullPolicy -}}" command: ['sh', '-c', {{ include "dagster.postgresql.pgisready" . | squote }}] securityContext: {{- toYaml .Values.flower.securityContext | nindent 12 }} + {{- end }} containers: - name: {{ .Chart.Name }} securityContext: diff --git a/helm/dagster/templates/helpers/_deployment-webserver.tpl b/helm/dagster/templates/helpers/_deployment-webserver.tpl index 2c5f649485d61..9427646bf4999 100644 --- a/helm/dagster/templates/helpers/_deployment-webserver.tpl +++ b/helm/dagster/templates/helpers/_deployment-webserver.tpl @@ -47,6 +47,7 @@ spec: securityContext: {{- toYaml $_.Values.dagsterWebserver.podSecurityContext | nindent 8 }} initContainers: + {{- if .Values.dagsterWebserver.checkDbReadyInitContainer }} - name: check-db-ready image: {{ include "dagster.externalImage.name" .Values.postgresql.image | quote }} imagePullPolicy: {{ .Values.postgresql.image.pullPolicy }} @@ -57,6 +58,7 @@ spec: resources: {{- toYaml $_.Values.dagsterWebserver.initContainerResources | nindent 12 }} {{- end }} + {{- end }} {{- if (and $userDeployments.enabled $userDeployments.enableSubchart) }} {{- range $deployment := $userDeployments.deployments }} - name: "init-user-deployment-{{- $deployment.name -}}" diff --git a/helm/dagster/values.schema.json b/helm/dagster/values.schema.json index dcf24a595a95d..eb73d8863ccc7 100644 --- a/helm/dagster/values.schema.json +++ b/helm/dagster/values.schema.json @@ -216,6 +216,18 @@ "resources": { "$ref": "#/$defs/Resources" }, + "checkDbReadyInitContainer": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Checkdbreadyinitcontainer" + }, "livenessProbe": { "$ref": "#/$defs/LivenessProbe" }, @@ -647,6 +659,18 @@ "resources": { "$ref": "#/$defs/Resources" }, + "checkDbReadyInitContainer": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Checkdbreadyinitcontainer" + }, "livenessProbe": { "$ref": "#/$defs/LivenessProbe" }, @@ -837,6 +861,18 @@ "securityContext": { "$ref": "#/$defs/SecurityContext" }, + "checkDbReadyInitContainer": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Checkdbreadyinitcontainer" + }, "resources": { "$ref": "#/$defs/Resources" }, @@ -3137,6 +3173,18 @@ "securityContext": { "$ref": "#/$defs/SecurityContext" }, + "checkDbReadyInitContainer": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Checkdbreadyinitcontainer" + }, "resources": { "$ref": "#/$defs/Resources" }, diff --git a/helm/dagster/values.yaml b/helm/dagster/values.yaml index ffd28ca0b84d5..3e7c6cb41c5da 100644 --- a/helm/dagster/values.yaml +++ b/helm/dagster/values.yaml @@ -165,6 +165,10 @@ dagsterWebserver: # Configure initContainer resources separately from main container initContainerResources: {} + + # Enable the check-db-ready initContainer + checkDbReadyInitContainer: true + # Override the default K8s scheduler # schedulerName: ~ @@ -689,6 +693,8 @@ runLauncher: # memory: 128Mi resources: {} + # Enable the check-db-ready initContainer + checkDbReadyInitContainer: true # Override the default K8s scheduler # schedulerName: ~ @@ -885,6 +891,8 @@ flower: podSecurityContext: {} securityContext: {} + # Enable the check-db-ready initContainer + checkDbReadyInitContainer: true # Override the default K8s scheduler # schedulerName: ~ @@ -1219,6 +1227,8 @@ dagsterDaemon: # Configure initContainer resources separately from main container initContainerResources: {} + # Enable the check-db-ready initContainer + checkDbReadyInitContainer: true # Override the default K8s scheduler # schedulerName: ~ diff --git a/integration_tests/test_suites/daemon-test-suite/auto_run_reexecution_tests/test_auto_run_reexecution.py b/integration_tests/test_suites/daemon-test-suite/auto_run_reexecution_tests/test_auto_run_reexecution.py index 49c839775fbb4..3b58a6453ea62 100644 --- a/integration_tests/test_suites/daemon-test-suite/auto_run_reexecution_tests/test_auto_run_reexecution.py +++ b/integration_tests/test_suites/daemon-test-suite/auto_run_reexecution_tests/test_auto_run_reexecution.py @@ -14,6 +14,7 @@ AUTO_RETRY_RUN_ID_TAG, MAX_RETRIES_TAG, PARENT_RUN_ID_TAG, + RESUME_RETRY_TAG, RETRY_ON_ASSET_OR_OP_FAILURE_TAG, RETRY_STRATEGY_TAG, ROOT_RUN_ID_TAG, @@ -395,7 +396,15 @@ def test_consume_new_runs_for_automatic_reexecution(instance, workspace_context) assert len(instance.run_coordinator.queue()) == 0 # retries failure - run = create_run(instance, status=DagsterRunStatus.STARTED, tags={MAX_RETRIES_TAG: "2"}) + run = create_run( + instance, + status=DagsterRunStatus.STARTED, + tags={ + MAX_RETRIES_TAG: "2", + RESUME_RETRY_TAG: "true", + RETRY_STRATEGY_TAG: "ALL_STEPS", + }, + ) dagster_event = DagsterEvent( event_type_value=DagsterEventType.PIPELINE_FAILURE.value, job_name="foo", @@ -427,6 +436,10 @@ def test_consume_new_runs_for_automatic_reexecution(instance, workspace_context) run = instance.get_run_by_id(run.run_id) assert run.tags.get(AUTO_RETRY_RUN_ID_TAG) == first_retry.run_id + # retry strategy is copied, "is_resume_retry" is not since the retry strategy is ALL_STEPS + assert RESUME_RETRY_TAG not in first_retry.tags + assert first_retry.tags.get(RETRY_STRATEGY_TAG) == "ALL_STEPS" + # doesn't retry again list( consume_new_runs_for_automatic_reexecution( diff --git a/js_modules/dagster-ui/.gitattributes b/js_modules/dagster-ui/.gitattributes index bf95c09ab463e..9428a42a1d54b 100644 --- a/js_modules/dagster-ui/.gitattributes +++ b/js_modules/dagster-ui/.gitattributes @@ -10,3 +10,4 @@ packages/ui-core/client.json linguist-generated=true packages/ui-core/src/asset-selection/generated/* linguist-generated=true packages/ui-core/src/selection/generated/* linguist-generated=true packages/ui-core/src/run-selection/generated/* linguist-generated=true +packages/ui-core/src/op-selection/generated/* linguist-generated=true diff --git a/js_modules/dagster-ui/packages/app-oss/package.json b/js_modules/dagster-ui/packages/app-oss/package.json index f6d3fb76365a6..82cff1e839639 100644 --- a/js_modules/dagster-ui/packages/app-oss/package.json +++ b/js_modules/dagster-ui/packages/app-oss/package.json @@ -14,7 +14,7 @@ "@rive-app/react-canvas": "^3.0.34", "eslint-config-next": "^13.5.3", "graphql": "^16.8.1", - "next": "^14.2.10", + "next": "^14.2.15", "react": "^18.3.1", "react-dom": "^18.3.1", "react-is": "^18.3.1", diff --git a/js_modules/dagster-ui/packages/ui-core/package.json b/js_modules/dagster-ui/packages/ui-core/package.json index af7ba534148f7..ff98bbec75a34 100644 --- a/js_modules/dagster-ui/packages/ui-core/package.json +++ b/js_modules/dagster-ui/packages/ui-core/package.json @@ -17,6 +17,7 @@ "generate-asset-selection": "ts-node -O '{\"module\": \"commonjs\"}' ./src/scripts/generateAssetSelection.ts && eslint src/asset-selection/generated/ --fix -c .eslintrc.js", "generate-selection-autocomplete": "ts-node -O '{\"module\": \"commonjs\"}' ./src/scripts/generateSelection.ts && eslint src/selection/generated/ --fix -c .eslintrc.js", "generate-run-selection": "ts-node -O '{\"module\": \"commonjs\"}' ./src/scripts/generateRunSelection.ts && eslint src/run-selection/generated/ --fix -c .eslintrc.js", + "generate-op-selection": "ts-node -O '{\"module\": \"commonjs\"}' ./src/scripts/generateOpSelection.ts && eslint src/op-selection/generated/ --fix -c .eslintrc.js", "storybook": "storybook dev -p 6006", "build-storybook": "storybook build" }, diff --git a/js_modules/dagster-ui/packages/ui-core/src/app/DefaultFeatureFlags.oss.tsx b/js_modules/dagster-ui/packages/ui-core/src/app/DefaultFeatureFlags.oss.tsx index 63a0632523530..4dab268bccfc8 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/app/DefaultFeatureFlags.oss.tsx +++ b/js_modules/dagster-ui/packages/ui-core/src/app/DefaultFeatureFlags.oss.tsx @@ -8,6 +8,9 @@ export const DEFAULT_FEATURE_FLAG_VALUES: Partial> [FeatureFlag.flagAssetSelectionSyntax]: new URLSearchParams(global?.location?.search ?? '').has( 'new-asset-selection-syntax', ), + [FeatureFlag.flagRunSelectionSyntax]: new URLSearchParams(global?.location?.search ?? '').has( + 'new-run-selection-syntax', + ), // Flags for tests [FeatureFlag.__TestFlagDefaultTrue]: true, diff --git a/js_modules/dagster-ui/packages/ui-core/src/asset-selection/input/AssetSelectionInput.oss.tsx b/js_modules/dagster-ui/packages/ui-core/src/asset-selection/input/AssetSelectionInput.oss.tsx index f3060d7f460f8..ef87451179f9b 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/asset-selection/input/AssetSelectionInput.oss.tsx +++ b/js_modules/dagster-ui/packages/ui-core/src/asset-selection/input/AssetSelectionInput.oss.tsx @@ -1,19 +1,15 @@ -import {Colors, Icon, Icons} from '@dagster-io/ui-components'; -import CodeMirror, {Editor, HintFunction} from 'codemirror'; -import {useLayoutEffect, useMemo, useRef} from 'react'; -import styled, {createGlobalStyle, css} from 'styled-components'; +import {Icons} from '@dagster-io/ui-components'; +import {useMemo} from 'react'; +import styled from 'styled-components'; -import {lintAssetSelection} from './AssetSelectionLinter'; import {assertUnreachable} from '../../app/Util'; import {AssetGraphQueryItem} from '../../asset-graph/useAssetGraphData'; -import {useUpdatingRef} from '../../hooks/useUpdatingRef'; -import {createSelectionHint} from '../../selection/SelectionAutoComplete'; -import { - SelectionAutoCompleteInputCSS, - applyStaticSyntaxHighlighting, -} from '../../selection/SelectionAutoCompleteHighlighter'; +import {SelectionAutoCompleteInput, iconStyle} from '../../selection/SelectionAutoCompleteInput'; +import {createSelectionLinter} from '../../selection/createSelectionLinter'; import {placeholderTextForItems} from '../../ui/GraphQueryInput'; import {buildRepoPathForHuman} from '../../workspace/buildRepoAddress'; +import {AssetSelectionLexer} from '../generated/AssetSelectionLexer'; +import {AssetSelectionParser} from '../generated/AssetSelectionParser'; import 'codemirror/addon/edit/closebrackets'; import 'codemirror/lib/codemirror.css'; @@ -32,215 +28,86 @@ interface AssetSelectionInputProps { const FUNCTIONS = ['sinks', 'roots']; export const AssetSelectionInput = ({value, onChange, assets}: AssetSelectionInputProps) => { - const editorRef = useRef(null); - const cmInstance = useRef(null); - - const currentValueRef = useRef(value); - - const hintRef = useUpdatingRef( - useMemo(() => { - const assetNamesSet: Set = new Set(); - const tagNamesSet: Set = new Set(); - const ownersSet: Set = new Set(); - const groupsSet: Set = new Set(); - const kindsSet: Set = new Set(); - const codeLocationSet: Set = new Set(); - - assets.forEach((asset) => { - assetNamesSet.add(asset.name); - asset.node.tags.forEach((tag) => { - if (tag.key && tag.value) { - tagNamesSet.add(`${tag.key}=${tag.value}`); - } else { - tagNamesSet.add(tag.key); - } - }); - asset.node.owners.forEach((owner) => { - switch (owner.__typename) { - case 'TeamAssetOwner': - ownersSet.add(owner.team); - break; - case 'UserAssetOwner': - ownersSet.add(owner.email); - break; - default: - assertUnreachable(owner); - } - }); - if (asset.node.groupName) { - groupsSet.add(asset.node.groupName); + const attributesMap = useMemo(() => { + const assetNamesSet: Set = new Set(); + const tagNamesSet: Set = new Set(); + const ownersSet: Set = new Set(); + const groupsSet: Set = new Set(); + const kindsSet: Set = new Set(); + const codeLocationSet: Set = new Set(); + + assets.forEach((asset) => { + assetNamesSet.add(asset.name); + asset.node.tags.forEach((tag) => { + if (tag.key && tag.value) { + tagNamesSet.add(`${tag.key}=${tag.value}`); + } else { + tagNamesSet.add(tag.key); } - asset.node.kinds.forEach((kind) => { - kindsSet.add(kind); - }); - const location = buildRepoPathForHuman( - asset.node.repository.name, - asset.node.repository.location.name, - ); - codeLocationSet.add(location); - }); - - const assetNames = Array.from(assetNamesSet); - const tagNames = Array.from(tagNamesSet); - const owners = Array.from(ownersSet); - const groups = Array.from(groupsSet); - const kinds = Array.from(kindsSet); - const codeLocations = Array.from(codeLocationSet); - - return createSelectionHint( - 'key', - { - key: assetNames, - tag: tagNames, - owner: owners, - group: groups, - kind: kinds, - code_location: codeLocations, - }, - FUNCTIONS, - ); - }, [assets]), - ); - - useLayoutEffect(() => { - if (editorRef.current && !cmInstance.current) { - cmInstance.current = CodeMirror(editorRef.current, { - value, - mode: 'assetSelection', - lineNumbers: false, - lineWrapping: false, - scrollbarStyle: 'native', - autoCloseBrackets: true, - lint: { - getAnnotations: lintAssetSelection, - async: false, - }, - placeholder: placeholderTextForItems('Type an asset subset…', assets), - extraKeys: { - 'Ctrl-Space': 'autocomplete', - Tab: (cm: Editor) => { - cm.replaceSelection(' ', 'end'); - }, - }, }); - - cmInstance.current.setSize('100%', 20); - - // Enforce single line by preventing newlines - cmInstance.current.on('beforeChange', (_instance: Editor, change) => { - if (change.text.some((line) => line.includes('\n'))) { - change.cancel(); - } - }); - - cmInstance.current.on('change', (instance: Editor, change) => { - const newValue = instance.getValue().replace(/\s+/g, ' '); - currentValueRef.current = newValue; - onChange(newValue); - - if (change.origin === 'complete' && change.text[0]?.endsWith('()')) { - // Set cursor inside the right parenthesis - const cursor = instance.getCursor(); - instance.setCursor({...cursor, ch: cursor.ch - 1}); + asset.node.owners.forEach((owner) => { + switch (owner.__typename) { + case 'TeamAssetOwner': + ownersSet.add(owner.team); + break; + case 'UserAssetOwner': + ownersSet.add(owner.email); + break; + default: + assertUnreachable(owner); } }); - - cmInstance.current.on('inputRead', (instance: Editor) => { - showHint(instance, hintRef.current); - }); - - cmInstance.current.on('cursorActivity', (instance: Editor) => { - applyStaticSyntaxHighlighting(instance); - showHint(instance, hintRef.current); - }); - - requestAnimationFrame(() => { - if (!cmInstance.current) { - return; - } - - applyStaticSyntaxHighlighting(cmInstance.current); + if (asset.node.groupName) { + groupsSet.add(asset.node.groupName); + } + asset.node.kinds.forEach((kind) => { + kindsSet.add(kind); }); - } - // eslint-disable-next-line react-hooks/exhaustive-deps - }, []); - - // Update CodeMirror when value prop changes - useLayoutEffect(() => { - const noNewLineValue = value.replace('\n', ' '); - if (cmInstance.current && cmInstance.current.getValue() !== noNewLineValue) { - const instance = cmInstance.current; - const cursor = instance.getCursor(); - instance.setValue(noNewLineValue); - instance.setCursor(cursor); - showHint(instance, hintRef.current); - } - }, [hintRef, value]); + const location = buildRepoPathForHuman( + asset.node.repository.name, + asset.node.repository.location.name, + ); + codeLocationSet.add(location); + }); + const assetNames = Array.from(assetNamesSet); + const tagNames = Array.from(tagNamesSet); + const owners = Array.from(ownersSet); + const groups = Array.from(groupsSet); + const kinds = Array.from(kindsSet); + const codeLocations = Array.from(codeLocationSet); + + return { + key: assetNames, + tag: tagNames, + owner: owners, + group: groups, + kind: kinds, + code_location: codeLocations, + }; + }, [assets]); + + const linter = useMemo( + () => createSelectionLinter({Lexer: AssetSelectionLexer, Parser: AssetSelectionParser}), + [], + ); return ( - <> - - - -
- - - + + + ); }; -const iconStyle = (img: string) => css` - &:before { - content: ' '; - width: 14px; - mask-size: contain; - mask-repeat: no-repeat; - mask-position: center; - mask-image: url(${img}); - background: ${Colors.accentPrimary()}; - display: inline-block; - } -`; - -const InputDiv = styled.div` - ${SelectionAutoCompleteInputCSS} +const WrapperDiv = styled.div` .attribute-owner { ${iconStyle(Icons.owner.src)} } `; - -const GlobalHintStyles = createGlobalStyle` - .CodeMirror-hints { - background: ${Colors.popoverBackground()}; - border: none; - border-radius: 4px; - padding: 8px 4px; - .CodeMirror-hint { - border-radius: 4px; - font-size: 14px; - padding: 6px 8px 6px 12px; - color: ${Colors.textDefault()}; - &.CodeMirror-hint-active { - background-color: ${Colors.backgroundBlue()}; - color: ${Colors.textDefault()}; - } - } - } -`; - -function showHint(instance: Editor, hint: HintFunction) { - requestAnimationFrame(() => { - instance.showHint({ - hint, - completeSingle: false, - moveOnOverlap: true, - updateOnCursorActivity: true, - }); - }); -} diff --git a/js_modules/dagster-ui/packages/ui-core/src/asset-selection/input/AssetSelectionLinter.ts b/js_modules/dagster-ui/packages/ui-core/src/asset-selection/input/AssetSelectionLinter.ts deleted file mode 100644 index f1f1eb059c23e..0000000000000 --- a/js_modules/dagster-ui/packages/ui-core/src/asset-selection/input/AssetSelectionLinter.ts +++ /dev/null @@ -1,34 +0,0 @@ -import {CharStreams, CommonTokenStream} from 'antlr4ts'; -import CodeMirror from 'codemirror'; - -import {AssetSelectionSyntaxErrorListener} from './AssetSelectionSyntaxErrorListener'; -import {AssetSelectionLexer} from '../generated/AssetSelectionLexer'; -import {AssetSelectionParser} from '../generated/AssetSelectionParser'; - -export const lintAssetSelection = (text: string) => { - const errorListener = new AssetSelectionSyntaxErrorListener(); - - const inputStream = CharStreams.fromString(text); - const lexer = new AssetSelectionLexer(inputStream); - - lexer.removeErrorListeners(); - lexer.addErrorListener(errorListener); - - const tokens = new CommonTokenStream(lexer); - const parser = new AssetSelectionParser(tokens); - - parser.removeErrorListeners(); // Remove default console error listener - parser.addErrorListener(errorListener); - - parser.start(); - - // Map syntax errors to CodeMirror's lint format - const lintErrors = errorListener.errors.map((error) => ({ - message: error.message.replace(', ', ''), - severity: 'error', - from: CodeMirror.Pos(error.line, error.column), - to: CodeMirror.Pos(error.line, text.length), - })); - - return lintErrors; -}; diff --git a/js_modules/dagster-ui/packages/ui-core/src/asset-selection/input/AssetSelectionSyntaxErrorListener.tsx b/js_modules/dagster-ui/packages/ui-core/src/asset-selection/input/AssetSelectionSyntaxErrorListener.tsx deleted file mode 100644 index 89d87f2dbf09e..0000000000000 --- a/js_modules/dagster-ui/packages/ui-core/src/asset-selection/input/AssetSelectionSyntaxErrorListener.tsx +++ /dev/null @@ -1,26 +0,0 @@ -import {ANTLRErrorListener, RecognitionException, Recognizer} from 'antlr4ts'; - -interface SyntaxError { - message: string; - line: number; - column: number; -} - -export class AssetSelectionSyntaxErrorListener implements ANTLRErrorListener { - public errors: SyntaxError[] = []; - - syntaxError( - _recognizer: Recognizer, - _offendingSymbol: T | undefined, - line: number, - charPositionInLine: number, - msg: string, - _e: RecognitionException | undefined, - ): void { - this.errors.push({ - message: msg, - line: line - 1, // CodeMirror lines are 0-based - column: charPositionInLine, - }); - } -} diff --git a/js_modules/dagster-ui/packages/ui-core/src/assets/BackfillPreviewModal.tsx b/js_modules/dagster-ui/packages/ui-core/src/assets/BackfillPreviewModal.tsx index 31607e4e01b28..f78dda46931d0 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/assets/BackfillPreviewModal.tsx +++ b/js_modules/dagster-ui/packages/ui-core/src/assets/BackfillPreviewModal.tsx @@ -57,7 +57,7 @@ export const BackfillPreviewModal = ({ skip: !isOpen, }, ); - const {data} = queryResult; + const {data, loading} = queryResult; const partitionsByAssetToken = useMemo(() => { return Object.fromEntries( @@ -107,8 +107,10 @@ export const BackfillPreviewModal = ({ {partitions ? ( - ) : ( + ) : loading ? ( + ) : ( + 'No partitions available to materialize' )} diff --git a/js_modules/dagster-ui/packages/ui-core/src/gantt/RunGroupPanel.tsx b/js_modules/dagster-ui/packages/ui-core/src/gantt/RunGroupPanel.tsx index 00f35180d3e53..6d65f30ae1767 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/gantt/RunGroupPanel.tsx +++ b/js_modules/dagster-ui/packages/ui-core/src/gantt/RunGroupPanel.tsx @@ -96,7 +96,7 @@ export const RunGroupPanel = ({ }); return ( - + <> {runs.map((g, idx) => g ? ( diff --git a/js_modules/dagster-ui/packages/ui-core/src/graph/OpTags.tsx b/js_modules/dagster-ui/packages/ui-core/src/graph/OpTags.tsx index 00ac437c859c8..b87007caaf369 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/graph/OpTags.tsx +++ b/js_modules/dagster-ui/packages/ui-core/src/graph/OpTags.tsx @@ -62,6 +62,7 @@ import gitlab from './kindtag-images/tool-gitlab-color.svg'; import go from './kindtag-images/tool-go-color.svg'; import google from './kindtag-images/tool-google-color.svg'; import googlecloud from './kindtag-images/tool-googlecloud-color.svg'; +import googledrive from './kindtag-images/tool-googledrive-color.svg'; import googlesheets from './kindtag-images/tool-googlesheets-color.svg'; import graphql from './kindtag-images/tool-graphql-color.svg'; import greatexpectations from './kindtag-images/tool-greatexpectations-color.svg'; @@ -248,6 +249,7 @@ export type KnownTagType = | 'powerbi' | 'gcp' | 'googlecloud' + | 'googledrive' | 'looker' | 'tableau' | 'segment' @@ -580,6 +582,10 @@ export const KNOWN_TAGS: Record = { icon: googlecloud, content: 'Google Cloud', }, + googledrive: { + icon: googledrive, + content: 'Google Drive', + }, looker: { icon: looker, content: 'Looker', diff --git a/js_modules/dagster-ui/packages/ui-core/src/graph/kindtag-images/tool-googledrive-color.svg b/js_modules/dagster-ui/packages/ui-core/src/graph/kindtag-images/tool-googledrive-color.svg new file mode 100644 index 0000000000000..2d94beff46945 --- /dev/null +++ b/js_modules/dagster-ui/packages/ui-core/src/graph/kindtag-images/tool-googledrive-color.svg @@ -0,0 +1,3 @@ + + + diff --git a/js_modules/dagster-ui/packages/ui-core/src/launchpad/useLaunchMultipleRunsWithTelemetry.ts b/js_modules/dagster-ui/packages/ui-core/src/launchpad/useLaunchMultipleRunsWithTelemetry.ts index d9b7d60528a77..9249da1025e91 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/launchpad/useLaunchMultipleRunsWithTelemetry.ts +++ b/js_modules/dagster-ui/packages/ui-core/src/launchpad/useLaunchMultipleRunsWithTelemetry.ts @@ -29,7 +29,9 @@ export function useLaunchMultipleRunsWithTelemetry() { const executionParamsList = Array.isArray(variables.executionParamsList) ? variables.executionParamsList : [variables.executionParamsList]; - const jobNames = executionParamsList.map((params) => params.selector?.jobName); + const jobNames = executionParamsList.map( + (params) => params.selector.jobName || params.selector.pipelineName, + ); if ( jobNames.length !== executionParamsList.length || diff --git a/js_modules/dagster-ui/packages/ui-core/src/op-selection/AntlrOpSelection.ts b/js_modules/dagster-ui/packages/ui-core/src/op-selection/AntlrOpSelection.ts new file mode 100644 index 0000000000000..614bde690e347 --- /dev/null +++ b/js_modules/dagster-ui/packages/ui-core/src/op-selection/AntlrOpSelection.ts @@ -0,0 +1,42 @@ +import {CharStreams, CommonTokenStream} from 'antlr4ts'; + +import {AntlrOpSelectionVisitor} from './AntlrOpSelectionVisitor'; +import {GraphQueryItem} from '../app/GraphQueryImpl'; +import {AntlrInputErrorListener} from '../asset-selection/AntlrAssetSelection'; +import {OpSelectionLexer} from './generated/OpSelectionLexer'; +import {OpSelectionParser} from './generated/OpSelectionParser'; + +type OpSelectionQueryResult = { + all: GraphQueryItem[]; + focus: GraphQueryItem[]; +}; + +export const parseOpSelectionQuery = ( + all_ops: GraphQueryItem[], + query: string, +): OpSelectionQueryResult | Error => { + try { + const lexer = new OpSelectionLexer(CharStreams.fromString(query)); + lexer.removeErrorListeners(); + lexer.addErrorListener(new AntlrInputErrorListener()); + + const tokenStream = new CommonTokenStream(lexer); + + const parser = new OpSelectionParser(tokenStream); + parser.removeErrorListeners(); + parser.addErrorListener(new AntlrInputErrorListener()); + + const tree = parser.start(); + + const visitor = new AntlrOpSelectionVisitor(all_ops); + const all_selection = visitor.visit(tree); + const focus_selection = visitor.focus_ops; + + return { + all: Array.from(all_selection), + focus: Array.from(focus_selection), + }; + } catch (e) { + return e as Error; + } +}; diff --git a/js_modules/dagster-ui/packages/ui-core/src/op-selection/AntlrOpSelectionVisitor.ts b/js_modules/dagster-ui/packages/ui-core/src/op-selection/AntlrOpSelectionVisitor.ts new file mode 100644 index 0000000000000..7af548928dfe6 --- /dev/null +++ b/js_modules/dagster-ui/packages/ui-core/src/op-selection/AntlrOpSelectionVisitor.ts @@ -0,0 +1,123 @@ +import {AbstractParseTreeVisitor} from 'antlr4ts/tree/AbstractParseTreeVisitor'; + +import {GraphQueryItem, GraphTraverser} from '../app/GraphQueryImpl'; +import { + AllExpressionContext, + AndExpressionContext, + AttributeExpressionContext, + DownTraversalExpressionContext, + NameExprContext, + NameSubstringExprContext, + NotExpressionContext, + OrExpressionContext, + ParenthesizedExpressionContext, + StartContext, + TraversalAllowedExpressionContext, + UpAndDownTraversalExpressionContext, + UpTraversalExpressionContext, +} from './generated/OpSelectionParser'; +import {OpSelectionVisitor} from './generated/OpSelectionVisitor'; +import {getTraversalDepth, getValue} from '../asset-selection/AntlrAssetSelectionVisitor'; + +export class AntlrOpSelectionVisitor + extends AbstractParseTreeVisitor> + implements OpSelectionVisitor> +{ + all_ops: Set; + focus_ops: Set; + traverser: GraphTraverser; + + protected defaultResult() { + return new Set(); + } + + constructor(all_ops: GraphQueryItem[]) { + super(); + this.all_ops = new Set(all_ops); + this.focus_ops = new Set(); + this.traverser = new GraphTraverser(all_ops); + } + + visitStart(ctx: StartContext) { + return this.visit(ctx.expr()); + } + + visitTraversalAllowedExpression(ctx: TraversalAllowedExpressionContext) { + return this.visit(ctx.traversalAllowedExpr()); + } + + visitUpAndDownTraversalExpression(ctx: UpAndDownTraversalExpressionContext) { + const selection = this.visit(ctx.traversalAllowedExpr()); + const up_depth: number = getTraversalDepth(ctx.traversal(0)); + const down_depth: number = getTraversalDepth(ctx.traversal(1)); + const selection_copy = new Set(selection); + for (const item of selection_copy) { + this.traverser.fetchUpstream(item, up_depth).forEach((i) => selection.add(i)); + this.traverser.fetchDownstream(item, down_depth).forEach((i) => selection.add(i)); + } + return selection; + } + + visitUpTraversalExpression(ctx: UpTraversalExpressionContext) { + const selection = this.visit(ctx.traversalAllowedExpr()); + const traversal_depth: number = getTraversalDepth(ctx.traversal()); + const selection_copy = new Set(selection); + for (const item of selection_copy) { + this.traverser.fetchUpstream(item, traversal_depth).forEach((i) => selection.add(i)); + } + return selection; + } + + visitDownTraversalExpression(ctx: DownTraversalExpressionContext) { + const selection = this.visit(ctx.traversalAllowedExpr()); + const traversal_depth: number = getTraversalDepth(ctx.traversal()); + const selection_copy = new Set(selection); + for (const item of selection_copy) { + this.traverser.fetchDownstream(item, traversal_depth).forEach((i) => selection.add(i)); + } + return selection; + } + + visitNotExpression(ctx: NotExpressionContext) { + const selection = this.visit(ctx.expr()); + return new Set([...this.all_ops].filter((i) => !selection.has(i))); + } + + visitAndExpression(ctx: AndExpressionContext) { + const left = this.visit(ctx.expr(0)); + const right = this.visit(ctx.expr(1)); + return new Set([...left].filter((i) => right.has(i))); + } + + visitOrExpression(ctx: OrExpressionContext) { + const left = this.visit(ctx.expr(0)); + const right = this.visit(ctx.expr(1)); + return new Set([...left, ...right]); + } + + visitAllExpression(_ctx: AllExpressionContext) { + return this.all_ops; + } + + visitAttributeExpression(ctx: AttributeExpressionContext) { + return this.visit(ctx.attributeExpr()); + } + + visitParenthesizedExpression(ctx: ParenthesizedExpressionContext) { + return this.visit(ctx.expr()); + } + + visitNameExpr(ctx: NameExprContext) { + const value: string = getValue(ctx.value()); + const selection = [...this.all_ops].filter((i) => i.name === value); + selection.forEach((i) => this.focus_ops.add(i)); + return new Set(selection); + } + + visitNameSubstringExpr(ctx: NameSubstringExprContext) { + const value: string = getValue(ctx.value()); + const selection = [...this.all_ops].filter((i) => i.name.includes(value)); + selection.forEach((i) => this.focus_ops.add(i)); + return new Set(selection); + } +} diff --git a/js_modules/dagster-ui/packages/ui-core/src/op-selection/OpSelection.g4 b/js_modules/dagster-ui/packages/ui-core/src/op-selection/OpSelection.g4 new file mode 100644 index 0000000000000..1edc8b2e04c85 --- /dev/null +++ b/js_modules/dagster-ui/packages/ui-core/src/op-selection/OpSelection.g4 @@ -0,0 +1,63 @@ +grammar OpSelection; + +start: expr EOF; + +// Root rule for parsing expressions +expr + : traversalAllowedExpr # TraversalAllowedExpression + | traversal traversalAllowedExpr traversal # UpAndDownTraversalExpression + | traversal traversalAllowedExpr # UpTraversalExpression + | traversalAllowedExpr traversal # DownTraversalExpression + | NOT expr # NotExpression + | expr AND expr # AndExpression + | expr OR expr # OrExpression + | STAR # AllExpression + ; + +// Allowed expressions for traversals +traversalAllowedExpr + : attributeExpr # AttributeExpression + | LPAREN expr RPAREN # ParenthesizedExpression + ; + +// Traversal operators +traversal + : STAR + | PLUS+ + ; + +// Attribute expressions for specific attributes +attributeExpr + : NAME COLON value # NameExpr + | NAME_SUBSTRING COLON value # NameSubstringExpr + ; + +// Value can be a quoted or unquoted string +value + : QUOTED_STRING + | UNQUOTED_STRING + ; + +// Tokens for operators and keywords +AND : 'and'; +OR : 'or'; +NOT : 'not'; + +STAR : '*'; +PLUS : '+'; + +COLON : ':'; + +LPAREN : '('; +RPAREN : ')'; + +// Tokens for attributes +NAME : 'name'; +NAME_SUBSTRING : 'name_substring'; + +// Tokens for strings +QUOTED_STRING : '"' (~["\\\r\n])* '"' ; +UNQUOTED_STRING : [a-zA-Z_][a-zA-Z0-9_]*; + +// Whitespace +WS : [ \t\r\n]+ -> skip ; \ No newline at end of file diff --git a/js_modules/dagster-ui/packages/ui-core/src/op-selection/__tests__/AntlrOpSelection.test.ts b/js_modules/dagster-ui/packages/ui-core/src/op-selection/__tests__/AntlrOpSelection.test.ts new file mode 100644 index 0000000000000..8150daefa203d --- /dev/null +++ b/js_modules/dagster-ui/packages/ui-core/src/op-selection/__tests__/AntlrOpSelection.test.ts @@ -0,0 +1,118 @@ +/* eslint-disable jest/expect-expect */ + +import {GraphQueryItem} from '../../app/GraphQueryImpl'; +import {parseOpSelectionQuery} from '../AntlrOpSelection'; + +const TEST_GRAPH: GraphQueryItem[] = [ + // Top Layer + { + name: 'A', + inputs: [{dependsOn: []}], + outputs: [{dependedBy: [{solid: {name: 'B'}}, {solid: {name: 'B2'}}]}], + }, + // Second Layer + { + name: 'B', + inputs: [{dependsOn: [{solid: {name: 'A'}}]}], + outputs: [{dependedBy: [{solid: {name: 'C'}}]}], + }, + { + name: 'B2', + inputs: [{dependsOn: [{solid: {name: 'A'}}]}], + outputs: [{dependedBy: [{solid: {name: 'C'}}]}], + }, + // Third Layer + { + name: 'C', + inputs: [{dependsOn: [{solid: {name: 'B'}}, {solid: {name: 'B2'}}]}], + outputs: [{dependedBy: []}], + }, +]; + +function assertQueryResult(query: string, expectedNames: string[]) { + const result = parseOpSelectionQuery(TEST_GRAPH, query); + expect(result).not.toBeInstanceOf(Error); + if (result instanceof Error) { + throw result; + } + expect(result.all.length).toBe(expectedNames.length); + expect(new Set(result.all.map((op) => op.name))).toEqual(new Set(expectedNames)); +} + +// Most tests copied from AntlrAssetSelection.test.ts +describe('parseOpSelectionQuery', () => { + describe('invalid queries', () => { + it('should throw on invalid queries', () => { + expect(parseOpSelectionQuery(TEST_GRAPH, 'A')).toBeInstanceOf(Error); + expect(parseOpSelectionQuery(TEST_GRAPH, 'name:A name:B')).toBeInstanceOf(Error); + expect(parseOpSelectionQuery(TEST_GRAPH, 'not')).toBeInstanceOf(Error); + expect(parseOpSelectionQuery(TEST_GRAPH, 'and')).toBeInstanceOf(Error); + expect(parseOpSelectionQuery(TEST_GRAPH, 'name:A and')).toBeInstanceOf(Error); + expect(parseOpSelectionQuery(TEST_GRAPH, 'sinks(*)')).toBeInstanceOf(Error); + expect(parseOpSelectionQuery(TEST_GRAPH, 'roots(*)')).toBeInstanceOf(Error); + expect(parseOpSelectionQuery(TEST_GRAPH, 'notafunction()')).toBeInstanceOf(Error); + expect(parseOpSelectionQuery(TEST_GRAPH, 'tag:foo=')).toBeInstanceOf(Error); + expect(parseOpSelectionQuery(TEST_GRAPH, 'owner')).toBeInstanceOf(Error); + expect(parseOpSelectionQuery(TEST_GRAPH, 'owner:owner@owner.com')).toBeInstanceOf(Error); + }); + }); + + describe('valid queries', () => { + it('should parse star query', () => { + assertQueryResult('*', ['A', 'B', 'B2', 'C']); + }); + + it('should parse name query', () => { + assertQueryResult('name:A', ['A']); + }); + + it('should parse name_substring query', () => { + assertQueryResult('name_substring:A', ['A']); + assertQueryResult('name_substring:B', ['B', 'B2']); + }); + + it('should parse and query', () => { + assertQueryResult('name:A and name:B', []); + assertQueryResult('name:A and name:B and name:C', []); + }); + + it('should parse or query', () => { + assertQueryResult('name:A or name:B', ['A', 'B']); + assertQueryResult('name:A or name:B or name:C', ['A', 'B', 'C']); + assertQueryResult('(name:A or name:B) and (name:B or name:C)', ['B']); + }); + + it('should parse upstream plus query', () => { + assertQueryResult('+name:A', ['A']); + assertQueryResult('+name:B', ['A', 'B']); + assertQueryResult('+name:C', ['B', 'B2', 'C']); + assertQueryResult('++name:C', ['A', 'B', 'B2', 'C']); + }); + + it('should parse downstream plus query', () => { + assertQueryResult('name:A+', ['A', 'B', 'B2']); + assertQueryResult('name:A++', ['A', 'B', 'B2', 'C']); + assertQueryResult('name:C+', ['C']); + assertQueryResult('name:B+', ['B', 'C']); + }); + + it('should parse upstream star query', () => { + assertQueryResult('*name:A', ['A']); + assertQueryResult('*name:B', ['A', 'B']); + assertQueryResult('*name:C', ['A', 'B', 'B2', 'C']); + }); + + it('should parse downstream star query', () => { + assertQueryResult('name:A*', ['A', 'B', 'B2', 'C']); + assertQueryResult('name:B*', ['B', 'C']); + assertQueryResult('name:C*', ['C']); + }); + + it('should parse up and down traversal queries', () => { + assertQueryResult('name:A* and *name:C', ['A', 'B', 'B2', 'C']); + assertQueryResult('*name:B*', ['A', 'B', 'C']); + assertQueryResult('name:A* and *name:C and *name:B*', ['A', 'B', 'C']); + assertQueryResult('name:A* and *name:B* and *name:C', ['A', 'B', 'C']); + }); + }); +}); diff --git a/js_modules/dagster-ui/packages/ui-core/src/op-selection/generated/OpSelection.interp b/js_modules/dagster-ui/packages/ui-core/src/op-selection/generated/OpSelection.interp new file mode 100644 index 0000000000000..88357962b7070 --- /dev/null +++ b/js_modules/dagster-ui/packages/ui-core/src/op-selection/generated/OpSelection.interp @@ -0,0 +1,43 @@ +token literal names: +null +'and' +'or' +'not' +'*' +'+' +':' +'(' +')' +'name' +'name_substring' +null +null +null + +token symbolic names: +null +AND +OR +NOT +STAR +PLUS +COLON +LPAREN +RPAREN +NAME +NAME_SUBSTRING +QUOTED_STRING +UNQUOTED_STRING +WS + +rule names: +start +expr +traversalAllowedExpr +traversal +attributeExpr +value + + +atn: +[3, 51485, 51898, 1421, 44986, 20307, 1543, 60043, 49729, 3, 15, 71, 4, 2, 9, 2, 4, 3, 9, 3, 4, 4, 9, 4, 4, 5, 9, 5, 4, 6, 9, 6, 4, 7, 9, 7, 3, 2, 3, 2, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 3, 33, 10, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 3, 41, 10, 3, 12, 3, 14, 3, 44, 11, 3, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 5, 4, 51, 10, 4, 3, 5, 3, 5, 6, 5, 55, 10, 5, 13, 5, 14, 5, 56, 5, 5, 59, 10, 5, 3, 6, 3, 6, 3, 6, 3, 6, 3, 6, 3, 6, 5, 6, 67, 10, 6, 3, 7, 3, 7, 3, 7, 2, 2, 3, 4, 8, 2, 2, 4, 2, 6, 2, 8, 2, 10, 2, 12, 2, 2, 3, 3, 2, 13, 14, 2, 75, 2, 14, 3, 2, 2, 2, 4, 32, 3, 2, 2, 2, 6, 50, 3, 2, 2, 2, 8, 58, 3, 2, 2, 2, 10, 66, 3, 2, 2, 2, 12, 68, 3, 2, 2, 2, 14, 15, 5, 4, 3, 2, 15, 16, 7, 2, 2, 3, 16, 3, 3, 2, 2, 2, 17, 18, 8, 3, 1, 2, 18, 33, 5, 6, 4, 2, 19, 20, 5, 8, 5, 2, 20, 21, 5, 6, 4, 2, 21, 22, 5, 8, 5, 2, 22, 33, 3, 2, 2, 2, 23, 24, 5, 8, 5, 2, 24, 25, 5, 6, 4, 2, 25, 33, 3, 2, 2, 2, 26, 27, 5, 6, 4, 2, 27, 28, 5, 8, 5, 2, 28, 33, 3, 2, 2, 2, 29, 30, 7, 5, 2, 2, 30, 33, 5, 4, 3, 6, 31, 33, 7, 6, 2, 2, 32, 17, 3, 2, 2, 2, 32, 19, 3, 2, 2, 2, 32, 23, 3, 2, 2, 2, 32, 26, 3, 2, 2, 2, 32, 29, 3, 2, 2, 2, 32, 31, 3, 2, 2, 2, 33, 42, 3, 2, 2, 2, 34, 35, 12, 5, 2, 2, 35, 36, 7, 3, 2, 2, 36, 41, 5, 4, 3, 6, 37, 38, 12, 4, 2, 2, 38, 39, 7, 4, 2, 2, 39, 41, 5, 4, 3, 5, 40, 34, 3, 2, 2, 2, 40, 37, 3, 2, 2, 2, 41, 44, 3, 2, 2, 2, 42, 40, 3, 2, 2, 2, 42, 43, 3, 2, 2, 2, 43, 5, 3, 2, 2, 2, 44, 42, 3, 2, 2, 2, 45, 51, 5, 10, 6, 2, 46, 47, 7, 9, 2, 2, 47, 48, 5, 4, 3, 2, 48, 49, 7, 10, 2, 2, 49, 51, 3, 2, 2, 2, 50, 45, 3, 2, 2, 2, 50, 46, 3, 2, 2, 2, 51, 7, 3, 2, 2, 2, 52, 59, 7, 6, 2, 2, 53, 55, 7, 7, 2, 2, 54, 53, 3, 2, 2, 2, 55, 56, 3, 2, 2, 2, 56, 54, 3, 2, 2, 2, 56, 57, 3, 2, 2, 2, 57, 59, 3, 2, 2, 2, 58, 52, 3, 2, 2, 2, 58, 54, 3, 2, 2, 2, 59, 9, 3, 2, 2, 2, 60, 61, 7, 11, 2, 2, 61, 62, 7, 8, 2, 2, 62, 67, 5, 12, 7, 2, 63, 64, 7, 12, 2, 2, 64, 65, 7, 8, 2, 2, 65, 67, 5, 12, 7, 2, 66, 60, 3, 2, 2, 2, 66, 63, 3, 2, 2, 2, 67, 11, 3, 2, 2, 2, 68, 69, 9, 2, 2, 2, 69, 13, 3, 2, 2, 2, 9, 32, 40, 42, 50, 56, 58, 66] \ No newline at end of file diff --git a/js_modules/dagster-ui/packages/ui-core/src/op-selection/generated/OpSelection.tokens b/js_modules/dagster-ui/packages/ui-core/src/op-selection/generated/OpSelection.tokens new file mode 100644 index 0000000000000..34097166bd470 --- /dev/null +++ b/js_modules/dagster-ui/packages/ui-core/src/op-selection/generated/OpSelection.tokens @@ -0,0 +1,23 @@ +AND=1 +OR=2 +NOT=3 +STAR=4 +PLUS=5 +COLON=6 +LPAREN=7 +RPAREN=8 +NAME=9 +NAME_SUBSTRING=10 +QUOTED_STRING=11 +UNQUOTED_STRING=12 +WS=13 +'and'=1 +'or'=2 +'not'=3 +'*'=4 +'+'=5 +':'=6 +'('=7 +')'=8 +'name'=9 +'name_substring'=10 diff --git a/js_modules/dagster-ui/packages/ui-core/src/op-selection/generated/OpSelectionLexer.interp b/js_modules/dagster-ui/packages/ui-core/src/op-selection/generated/OpSelectionLexer.interp new file mode 100644 index 0000000000000..8828f9b625b8b --- /dev/null +++ b/js_modules/dagster-ui/packages/ui-core/src/op-selection/generated/OpSelectionLexer.interp @@ -0,0 +1,56 @@ +token literal names: +null +'and' +'or' +'not' +'*' +'+' +':' +'(' +')' +'name' +'name_substring' +null +null +null + +token symbolic names: +null +AND +OR +NOT +STAR +PLUS +COLON +LPAREN +RPAREN +NAME +NAME_SUBSTRING +QUOTED_STRING +UNQUOTED_STRING +WS + +rule names: +AND +OR +NOT +STAR +PLUS +COLON +LPAREN +RPAREN +NAME +NAME_SUBSTRING +QUOTED_STRING +UNQUOTED_STRING +WS + +channel names: +DEFAULT_TOKEN_CHANNEL +HIDDEN + +mode names: +DEFAULT_MODE + +atn: +[3, 51485, 51898, 1421, 44986, 20307, 1543, 60043, 49729, 2, 15, 93, 8, 1, 4, 2, 9, 2, 4, 3, 9, 3, 4, 4, 9, 4, 4, 5, 9, 5, 4, 6, 9, 6, 4, 7, 9, 7, 4, 8, 9, 8, 4, 9, 9, 9, 4, 10, 9, 10, 4, 11, 9, 11, 4, 12, 9, 12, 4, 13, 9, 13, 4, 14, 9, 14, 3, 2, 3, 2, 3, 2, 3, 2, 3, 3, 3, 3, 3, 3, 3, 4, 3, 4, 3, 4, 3, 4, 3, 5, 3, 5, 3, 6, 3, 6, 3, 7, 3, 7, 3, 8, 3, 8, 3, 9, 3, 9, 3, 10, 3, 10, 3, 10, 3, 10, 3, 10, 3, 11, 3, 11, 3, 11, 3, 11, 3, 11, 3, 11, 3, 11, 3, 11, 3, 11, 3, 11, 3, 11, 3, 11, 3, 11, 3, 11, 3, 11, 3, 12, 3, 12, 7, 12, 73, 10, 12, 12, 12, 14, 12, 76, 11, 12, 3, 12, 3, 12, 3, 13, 3, 13, 7, 13, 82, 10, 13, 12, 13, 14, 13, 85, 11, 13, 3, 14, 6, 14, 88, 10, 14, 13, 14, 14, 14, 89, 3, 14, 3, 14, 2, 2, 2, 15, 3, 2, 3, 5, 2, 4, 7, 2, 5, 9, 2, 6, 11, 2, 7, 13, 2, 8, 15, 2, 9, 17, 2, 10, 19, 2, 11, 21, 2, 12, 23, 2, 13, 25, 2, 14, 27, 2, 15, 3, 2, 6, 6, 2, 12, 12, 15, 15, 36, 36, 94, 94, 5, 2, 67, 92, 97, 97, 99, 124, 6, 2, 50, 59, 67, 92, 97, 97, 99, 124, 5, 2, 11, 12, 15, 15, 34, 34, 2, 95, 2, 3, 3, 2, 2, 2, 2, 5, 3, 2, 2, 2, 2, 7, 3, 2, 2, 2, 2, 9, 3, 2, 2, 2, 2, 11, 3, 2, 2, 2, 2, 13, 3, 2, 2, 2, 2, 15, 3, 2, 2, 2, 2, 17, 3, 2, 2, 2, 2, 19, 3, 2, 2, 2, 2, 21, 3, 2, 2, 2, 2, 23, 3, 2, 2, 2, 2, 25, 3, 2, 2, 2, 2, 27, 3, 2, 2, 2, 3, 29, 3, 2, 2, 2, 5, 33, 3, 2, 2, 2, 7, 36, 3, 2, 2, 2, 9, 40, 3, 2, 2, 2, 11, 42, 3, 2, 2, 2, 13, 44, 3, 2, 2, 2, 15, 46, 3, 2, 2, 2, 17, 48, 3, 2, 2, 2, 19, 50, 3, 2, 2, 2, 21, 55, 3, 2, 2, 2, 23, 70, 3, 2, 2, 2, 25, 79, 3, 2, 2, 2, 27, 87, 3, 2, 2, 2, 29, 30, 7, 99, 2, 2, 30, 31, 7, 112, 2, 2, 31, 32, 7, 102, 2, 2, 32, 4, 3, 2, 2, 2, 33, 34, 7, 113, 2, 2, 34, 35, 7, 116, 2, 2, 35, 6, 3, 2, 2, 2, 36, 37, 7, 112, 2, 2, 37, 38, 7, 113, 2, 2, 38, 39, 7, 118, 2, 2, 39, 8, 3, 2, 2, 2, 40, 41, 7, 44, 2, 2, 41, 10, 3, 2, 2, 2, 42, 43, 7, 45, 2, 2, 43, 12, 3, 2, 2, 2, 44, 45, 7, 60, 2, 2, 45, 14, 3, 2, 2, 2, 46, 47, 7, 42, 2, 2, 47, 16, 3, 2, 2, 2, 48, 49, 7, 43, 2, 2, 49, 18, 3, 2, 2, 2, 50, 51, 7, 112, 2, 2, 51, 52, 7, 99, 2, 2, 52, 53, 7, 111, 2, 2, 53, 54, 7, 103, 2, 2, 54, 20, 3, 2, 2, 2, 55, 56, 7, 112, 2, 2, 56, 57, 7, 99, 2, 2, 57, 58, 7, 111, 2, 2, 58, 59, 7, 103, 2, 2, 59, 60, 7, 97, 2, 2, 60, 61, 7, 117, 2, 2, 61, 62, 7, 119, 2, 2, 62, 63, 7, 100, 2, 2, 63, 64, 7, 117, 2, 2, 64, 65, 7, 118, 2, 2, 65, 66, 7, 116, 2, 2, 66, 67, 7, 107, 2, 2, 67, 68, 7, 112, 2, 2, 68, 69, 7, 105, 2, 2, 69, 22, 3, 2, 2, 2, 70, 74, 7, 36, 2, 2, 71, 73, 10, 2, 2, 2, 72, 71, 3, 2, 2, 2, 73, 76, 3, 2, 2, 2, 74, 72, 3, 2, 2, 2, 74, 75, 3, 2, 2, 2, 75, 77, 3, 2, 2, 2, 76, 74, 3, 2, 2, 2, 77, 78, 7, 36, 2, 2, 78, 24, 3, 2, 2, 2, 79, 83, 9, 3, 2, 2, 80, 82, 9, 4, 2, 2, 81, 80, 3, 2, 2, 2, 82, 85, 3, 2, 2, 2, 83, 81, 3, 2, 2, 2, 83, 84, 3, 2, 2, 2, 84, 26, 3, 2, 2, 2, 85, 83, 3, 2, 2, 2, 86, 88, 9, 5, 2, 2, 87, 86, 3, 2, 2, 2, 88, 89, 3, 2, 2, 2, 89, 87, 3, 2, 2, 2, 89, 90, 3, 2, 2, 2, 90, 91, 3, 2, 2, 2, 91, 92, 8, 14, 2, 2, 92, 28, 3, 2, 2, 2, 6, 2, 74, 83, 89, 3, 8, 2, 2] \ No newline at end of file diff --git a/js_modules/dagster-ui/packages/ui-core/src/op-selection/generated/OpSelectionLexer.tokens b/js_modules/dagster-ui/packages/ui-core/src/op-selection/generated/OpSelectionLexer.tokens new file mode 100644 index 0000000000000..34097166bd470 --- /dev/null +++ b/js_modules/dagster-ui/packages/ui-core/src/op-selection/generated/OpSelectionLexer.tokens @@ -0,0 +1,23 @@ +AND=1 +OR=2 +NOT=3 +STAR=4 +PLUS=5 +COLON=6 +LPAREN=7 +RPAREN=8 +NAME=9 +NAME_SUBSTRING=10 +QUOTED_STRING=11 +UNQUOTED_STRING=12 +WS=13 +'and'=1 +'or'=2 +'not'=3 +'*'=4 +'+'=5 +':'=6 +'('=7 +')'=8 +'name'=9 +'name_substring'=10 diff --git a/js_modules/dagster-ui/packages/ui-core/src/op-selection/generated/OpSelectionLexer.ts b/js_modules/dagster-ui/packages/ui-core/src/op-selection/generated/OpSelectionLexer.ts new file mode 100644 index 0000000000000..3a01b871699a8 --- /dev/null +++ b/js_modules/dagster-ui/packages/ui-core/src/op-selection/generated/OpSelectionLexer.ts @@ -0,0 +1,170 @@ +// Generated from /Users/briantu/repos/dagster/js_modules/dagster-ui/packages/ui-core/src/op-selection/OpSelection.g4 by ANTLR 4.9.0-SNAPSHOT + +import {CharStream} from 'antlr4ts/CharStream'; +import {Lexer} from 'antlr4ts/Lexer'; +import {Vocabulary} from 'antlr4ts/Vocabulary'; +import {VocabularyImpl} from 'antlr4ts/VocabularyImpl'; +import {ATN} from 'antlr4ts/atn/ATN'; +import {ATNDeserializer} from 'antlr4ts/atn/ATNDeserializer'; +import {LexerATNSimulator} from 'antlr4ts/atn/LexerATNSimulator'; +import * as Utils from 'antlr4ts/misc/Utils'; + +export class OpSelectionLexer extends Lexer { + public static readonly AND = 1; + public static readonly OR = 2; + public static readonly NOT = 3; + public static readonly STAR = 4; + public static readonly PLUS = 5; + public static readonly COLON = 6; + public static readonly LPAREN = 7; + public static readonly RPAREN = 8; + public static readonly NAME = 9; + public static readonly NAME_SUBSTRING = 10; + public static readonly QUOTED_STRING = 11; + public static readonly UNQUOTED_STRING = 12; + public static readonly WS = 13; + + // tslint:disable:no-trailing-whitespace + public static readonly channelNames: string[] = ['DEFAULT_TOKEN_CHANNEL', 'HIDDEN']; + + // tslint:disable:no-trailing-whitespace + public static readonly modeNames: string[] = ['DEFAULT_MODE']; + + public static readonly ruleNames: string[] = [ + 'AND', + 'OR', + 'NOT', + 'STAR', + 'PLUS', + 'COLON', + 'LPAREN', + 'RPAREN', + 'NAME', + 'NAME_SUBSTRING', + 'QUOTED_STRING', + 'UNQUOTED_STRING', + 'WS', + ]; + + private static readonly _LITERAL_NAMES: Array = [ + undefined, + "'and'", + "'or'", + "'not'", + "'*'", + "'+'", + "':'", + "'('", + "')'", + "'name'", + "'name_substring'", + ]; + private static readonly _SYMBOLIC_NAMES: Array = [ + undefined, + 'AND', + 'OR', + 'NOT', + 'STAR', + 'PLUS', + 'COLON', + 'LPAREN', + 'RPAREN', + 'NAME', + 'NAME_SUBSTRING', + 'QUOTED_STRING', + 'UNQUOTED_STRING', + 'WS', + ]; + public static readonly VOCABULARY: Vocabulary = new VocabularyImpl( + OpSelectionLexer._LITERAL_NAMES, + OpSelectionLexer._SYMBOLIC_NAMES, + [], + ); + + // @Override + // @NotNull + public get vocabulary(): Vocabulary { + return OpSelectionLexer.VOCABULARY; + } + // tslint:enable:no-trailing-whitespace + + constructor(input: CharStream) { + super(input); + this._interp = new LexerATNSimulator(OpSelectionLexer._ATN, this); + } + + // @Override + public get grammarFileName(): string { + return 'OpSelection.g4'; + } + + // @Override + public get ruleNames(): string[] { + return OpSelectionLexer.ruleNames; + } + + // @Override + public get serializedATN(): string { + return OpSelectionLexer._serializedATN; + } + + // @Override + public get channelNames(): string[] { + return OpSelectionLexer.channelNames; + } + + // @Override + public get modeNames(): string[] { + return OpSelectionLexer.modeNames; + } + + public static readonly _serializedATN: string = + '\x03\uC91D\uCABA\u058D\uAFBA\u4F53\u0607\uEA8B\uC241\x02\x0F]\b\x01\x04' + + '\x02\t\x02\x04\x03\t\x03\x04\x04\t\x04\x04\x05\t\x05\x04\x06\t\x06\x04' + + '\x07\t\x07\x04\b\t\b\x04\t\t\t\x04\n\t\n\x04\v\t\v\x04\f\t\f\x04\r\t\r' + + '\x04\x0E\t\x0E\x03\x02\x03\x02\x03\x02\x03\x02\x03\x03\x03\x03\x03\x03' + + '\x03\x04\x03\x04\x03\x04\x03\x04\x03\x05\x03\x05\x03\x06\x03\x06\x03\x07' + + '\x03\x07\x03\b\x03\b\x03\t\x03\t\x03\n\x03\n\x03\n\x03\n\x03\n\x03\v\x03' + + '\v\x03\v\x03\v\x03\v\x03\v\x03\v\x03\v\x03\v\x03\v\x03\v\x03\v\x03\v\x03' + + '\v\x03\v\x03\f\x03\f\x07\fI\n\f\f\f\x0E\fL\v\f\x03\f\x03\f\x03\r\x03\r' + + '\x07\rR\n\r\f\r\x0E\rU\v\r\x03\x0E\x06\x0EX\n\x0E\r\x0E\x0E\x0EY\x03\x0E' + + '\x03\x0E\x02\x02\x02\x0F\x03\x02\x03\x05\x02\x04\x07\x02\x05\t\x02\x06' + + '\v\x02\x07\r\x02\b\x0F\x02\t\x11\x02\n\x13\x02\v\x15\x02\f\x17\x02\r\x19' + + '\x02\x0E\x1B\x02\x0F\x03\x02\x06\x06\x02\f\f\x0F\x0F$$^^\x05\x02C\\aa' + + 'c|\x06\x022;C\\aac|\x05\x02\v\f\x0F\x0F""\x02_\x02\x03\x03\x02\x02\x02' + + '\x02\x05\x03\x02\x02\x02\x02\x07\x03\x02\x02\x02\x02\t\x03\x02\x02\x02' + + '\x02\v\x03\x02\x02\x02\x02\r\x03\x02\x02\x02\x02\x0F\x03\x02\x02\x02\x02' + + '\x11\x03\x02\x02\x02\x02\x13\x03\x02\x02\x02\x02\x15\x03\x02\x02\x02\x02' + + '\x17\x03\x02\x02\x02\x02\x19\x03\x02\x02\x02\x02\x1B\x03\x02\x02\x02\x03' + + '\x1D\x03\x02\x02\x02\x05!\x03\x02\x02\x02\x07$\x03\x02\x02\x02\t(\x03' + + '\x02\x02\x02\v*\x03\x02\x02\x02\r,\x03\x02\x02\x02\x0F.\x03\x02\x02\x02' + + '\x110\x03\x02\x02\x02\x132\x03\x02\x02\x02\x157\x03\x02\x02\x02\x17F\x03' + + '\x02\x02\x02\x19O\x03\x02\x02\x02\x1BW\x03\x02\x02\x02\x1D\x1E\x07c\x02' + + '\x02\x1E\x1F\x07p\x02\x02\x1F \x07f\x02\x02 \x04\x03\x02\x02\x02!"\x07' + + 'q\x02\x02"#\x07t\x02\x02#\x06\x03\x02\x02\x02$%\x07p\x02\x02%&\x07q\x02' + + "\x02&'\x07v\x02\x02'\b\x03\x02\x02\x02()\x07,\x02\x02)\n\x03\x02\x02" + + '\x02*+\x07-\x02\x02+\f\x03\x02\x02\x02,-\x07<\x02\x02-\x0E\x03\x02\x02' + + '\x02./\x07*\x02\x02/\x10\x03\x02\x02\x0201\x07+\x02\x021\x12\x03\x02\x02' + + '\x0223\x07p\x02\x0234\x07c\x02\x0245\x07o\x02\x0256\x07g\x02\x026\x14' + + '\x03\x02\x02\x0278\x07p\x02\x0289\x07c\x02\x029:\x07o\x02\x02:;\x07g\x02' + + '\x02;<\x07a\x02\x02<=\x07u\x02\x02=>\x07w\x02\x02>?\x07d\x02\x02?@\x07' + + 'u\x02\x02@A\x07v\x02\x02AB\x07t\x02\x02BC\x07k\x02\x02CD\x07p\x02\x02' + + 'DE\x07i\x02\x02E\x16\x03\x02\x02\x02FJ\x07$\x02\x02GI\n\x02\x02\x02HG' + + '\x03\x02\x02\x02IL\x03\x02\x02\x02JH\x03\x02\x02\x02JK\x03\x02\x02\x02' + + 'KM\x03\x02\x02\x02LJ\x03\x02\x02\x02MN\x07$\x02\x02N\x18\x03\x02\x02\x02' + + 'OS\t\x03\x02\x02PR\t\x04\x02\x02QP\x03\x02\x02\x02RU\x03\x02\x02\x02S' + + 'Q\x03\x02\x02\x02ST\x03\x02\x02\x02T\x1A\x03\x02\x02\x02US\x03\x02\x02' + + '\x02VX\t\x05\x02\x02WV\x03\x02\x02\x02XY\x03\x02\x02\x02YW\x03\x02\x02' + + '\x02YZ\x03\x02\x02\x02Z[\x03\x02\x02\x02[\\\b\x0E\x02\x02\\\x1C\x03\x02' + + '\x02\x02\x06\x02JSY\x03\b\x02\x02'; + public static __ATN: ATN; + public static get _ATN(): ATN { + if (!OpSelectionLexer.__ATN) { + OpSelectionLexer.__ATN = new ATNDeserializer().deserialize( + Utils.toCharArray(OpSelectionLexer._serializedATN), + ); + } + + return OpSelectionLexer.__ATN; + } +} diff --git a/js_modules/dagster-ui/packages/ui-core/src/op-selection/generated/OpSelectionListener.ts b/js_modules/dagster-ui/packages/ui-core/src/op-selection/generated/OpSelectionListener.ts new file mode 100644 index 0000000000000..5c13bdbb60747 --- /dev/null +++ b/js_modules/dagster-ui/packages/ui-core/src/op-selection/generated/OpSelectionListener.ts @@ -0,0 +1,252 @@ +// Generated from /Users/briantu/repos/dagster/js_modules/dagster-ui/packages/ui-core/src/op-selection/OpSelection.g4 by ANTLR 4.9.0-SNAPSHOT + +import {ParseTreeListener} from 'antlr4ts/tree/ParseTreeListener'; + +import { + AllExpressionContext, + AndExpressionContext, + AttributeExprContext, + AttributeExpressionContext, + DownTraversalExpressionContext, + ExprContext, + NameExprContext, + NameSubstringExprContext, + NotExpressionContext, + OrExpressionContext, + ParenthesizedExpressionContext, + StartContext, + TraversalAllowedExprContext, + TraversalAllowedExpressionContext, + TraversalContext, + UpAndDownTraversalExpressionContext, + UpTraversalExpressionContext, + ValueContext, +} from './OpSelectionParser'; + +/** + * This interface defines a complete listener for a parse tree produced by + * `OpSelectionParser`. + */ +export interface OpSelectionListener extends ParseTreeListener { + /** + * Enter a parse tree produced by the `TraversalAllowedExpression` + * labeled alternative in `OpSelectionParser.expr`. + * @param ctx the parse tree + */ + enterTraversalAllowedExpression?: (ctx: TraversalAllowedExpressionContext) => void; + /** + * Exit a parse tree produced by the `TraversalAllowedExpression` + * labeled alternative in `OpSelectionParser.expr`. + * @param ctx the parse tree + */ + exitTraversalAllowedExpression?: (ctx: TraversalAllowedExpressionContext) => void; + + /** + * Enter a parse tree produced by the `UpAndDownTraversalExpression` + * labeled alternative in `OpSelectionParser.expr`. + * @param ctx the parse tree + */ + enterUpAndDownTraversalExpression?: (ctx: UpAndDownTraversalExpressionContext) => void; + /** + * Exit a parse tree produced by the `UpAndDownTraversalExpression` + * labeled alternative in `OpSelectionParser.expr`. + * @param ctx the parse tree + */ + exitUpAndDownTraversalExpression?: (ctx: UpAndDownTraversalExpressionContext) => void; + + /** + * Enter a parse tree produced by the `UpTraversalExpression` + * labeled alternative in `OpSelectionParser.expr`. + * @param ctx the parse tree + */ + enterUpTraversalExpression?: (ctx: UpTraversalExpressionContext) => void; + /** + * Exit a parse tree produced by the `UpTraversalExpression` + * labeled alternative in `OpSelectionParser.expr`. + * @param ctx the parse tree + */ + exitUpTraversalExpression?: (ctx: UpTraversalExpressionContext) => void; + + /** + * Enter a parse tree produced by the `DownTraversalExpression` + * labeled alternative in `OpSelectionParser.expr`. + * @param ctx the parse tree + */ + enterDownTraversalExpression?: (ctx: DownTraversalExpressionContext) => void; + /** + * Exit a parse tree produced by the `DownTraversalExpression` + * labeled alternative in `OpSelectionParser.expr`. + * @param ctx the parse tree + */ + exitDownTraversalExpression?: (ctx: DownTraversalExpressionContext) => void; + + /** + * Enter a parse tree produced by the `NotExpression` + * labeled alternative in `OpSelectionParser.expr`. + * @param ctx the parse tree + */ + enterNotExpression?: (ctx: NotExpressionContext) => void; + /** + * Exit a parse tree produced by the `NotExpression` + * labeled alternative in `OpSelectionParser.expr`. + * @param ctx the parse tree + */ + exitNotExpression?: (ctx: NotExpressionContext) => void; + + /** + * Enter a parse tree produced by the `AndExpression` + * labeled alternative in `OpSelectionParser.expr`. + * @param ctx the parse tree + */ + enterAndExpression?: (ctx: AndExpressionContext) => void; + /** + * Exit a parse tree produced by the `AndExpression` + * labeled alternative in `OpSelectionParser.expr`. + * @param ctx the parse tree + */ + exitAndExpression?: (ctx: AndExpressionContext) => void; + + /** + * Enter a parse tree produced by the `OrExpression` + * labeled alternative in `OpSelectionParser.expr`. + * @param ctx the parse tree + */ + enterOrExpression?: (ctx: OrExpressionContext) => void; + /** + * Exit a parse tree produced by the `OrExpression` + * labeled alternative in `OpSelectionParser.expr`. + * @param ctx the parse tree + */ + exitOrExpression?: (ctx: OrExpressionContext) => void; + + /** + * Enter a parse tree produced by the `AllExpression` + * labeled alternative in `OpSelectionParser.expr`. + * @param ctx the parse tree + */ + enterAllExpression?: (ctx: AllExpressionContext) => void; + /** + * Exit a parse tree produced by the `AllExpression` + * labeled alternative in `OpSelectionParser.expr`. + * @param ctx the parse tree + */ + exitAllExpression?: (ctx: AllExpressionContext) => void; + + /** + * Enter a parse tree produced by the `NameExpr` + * labeled alternative in `OpSelectionParser.attributeExpr`. + * @param ctx the parse tree + */ + enterNameExpr?: (ctx: NameExprContext) => void; + /** + * Exit a parse tree produced by the `NameExpr` + * labeled alternative in `OpSelectionParser.attributeExpr`. + * @param ctx the parse tree + */ + exitNameExpr?: (ctx: NameExprContext) => void; + + /** + * Enter a parse tree produced by the `NameSubstringExpr` + * labeled alternative in `OpSelectionParser.attributeExpr`. + * @param ctx the parse tree + */ + enterNameSubstringExpr?: (ctx: NameSubstringExprContext) => void; + /** + * Exit a parse tree produced by the `NameSubstringExpr` + * labeled alternative in `OpSelectionParser.attributeExpr`. + * @param ctx the parse tree + */ + exitNameSubstringExpr?: (ctx: NameSubstringExprContext) => void; + + /** + * Enter a parse tree produced by the `AttributeExpression` + * labeled alternative in `OpSelectionParser.traversalAllowedExpr`. + * @param ctx the parse tree + */ + enterAttributeExpression?: (ctx: AttributeExpressionContext) => void; + /** + * Exit a parse tree produced by the `AttributeExpression` + * labeled alternative in `OpSelectionParser.traversalAllowedExpr`. + * @param ctx the parse tree + */ + exitAttributeExpression?: (ctx: AttributeExpressionContext) => void; + + /** + * Enter a parse tree produced by the `ParenthesizedExpression` + * labeled alternative in `OpSelectionParser.traversalAllowedExpr`. + * @param ctx the parse tree + */ + enterParenthesizedExpression?: (ctx: ParenthesizedExpressionContext) => void; + /** + * Exit a parse tree produced by the `ParenthesizedExpression` + * labeled alternative in `OpSelectionParser.traversalAllowedExpr`. + * @param ctx the parse tree + */ + exitParenthesizedExpression?: (ctx: ParenthesizedExpressionContext) => void; + + /** + * Enter a parse tree produced by `OpSelectionParser.start`. + * @param ctx the parse tree + */ + enterStart?: (ctx: StartContext) => void; + /** + * Exit a parse tree produced by `OpSelectionParser.start`. + * @param ctx the parse tree + */ + exitStart?: (ctx: StartContext) => void; + + /** + * Enter a parse tree produced by `OpSelectionParser.expr`. + * @param ctx the parse tree + */ + enterExpr?: (ctx: ExprContext) => void; + /** + * Exit a parse tree produced by `OpSelectionParser.expr`. + * @param ctx the parse tree + */ + exitExpr?: (ctx: ExprContext) => void; + + /** + * Enter a parse tree produced by `OpSelectionParser.traversalAllowedExpr`. + * @param ctx the parse tree + */ + enterTraversalAllowedExpr?: (ctx: TraversalAllowedExprContext) => void; + /** + * Exit a parse tree produced by `OpSelectionParser.traversalAllowedExpr`. + * @param ctx the parse tree + */ + exitTraversalAllowedExpr?: (ctx: TraversalAllowedExprContext) => void; + + /** + * Enter a parse tree produced by `OpSelectionParser.traversal`. + * @param ctx the parse tree + */ + enterTraversal?: (ctx: TraversalContext) => void; + /** + * Exit a parse tree produced by `OpSelectionParser.traversal`. + * @param ctx the parse tree + */ + exitTraversal?: (ctx: TraversalContext) => void; + + /** + * Enter a parse tree produced by `OpSelectionParser.attributeExpr`. + * @param ctx the parse tree + */ + enterAttributeExpr?: (ctx: AttributeExprContext) => void; + /** + * Exit a parse tree produced by `OpSelectionParser.attributeExpr`. + * @param ctx the parse tree + */ + exitAttributeExpr?: (ctx: AttributeExprContext) => void; + + /** + * Enter a parse tree produced by `OpSelectionParser.value`. + * @param ctx the parse tree + */ + enterValue?: (ctx: ValueContext) => void; + /** + * Exit a parse tree produced by `OpSelectionParser.value`. + * @param ctx the parse tree + */ + exitValue?: (ctx: ValueContext) => void; +} diff --git a/js_modules/dagster-ui/packages/ui-core/src/op-selection/generated/OpSelectionParser.ts b/js_modules/dagster-ui/packages/ui-core/src/op-selection/generated/OpSelectionParser.ts new file mode 100644 index 0000000000000..a9c1c99063b14 --- /dev/null +++ b/js_modules/dagster-ui/packages/ui-core/src/op-selection/generated/OpSelectionParser.ts @@ -0,0 +1,1115 @@ +// Generated from /Users/briantu/repos/dagster/js_modules/dagster-ui/packages/ui-core/src/op-selection/OpSelection.g4 by ANTLR 4.9.0-SNAPSHOT + +import {FailedPredicateException} from 'antlr4ts/FailedPredicateException'; +import {NoViableAltException} from 'antlr4ts/NoViableAltException'; +import {Parser} from 'antlr4ts/Parser'; +import {ParserRuleContext} from 'antlr4ts/ParserRuleContext'; +import {RecognitionException} from 'antlr4ts/RecognitionException'; +import {RuleContext} from 'antlr4ts/RuleContext'; +//import { RuleVersion } from "antlr4ts/RuleVersion"; +import {Token} from 'antlr4ts/Token'; +import {TokenStream} from 'antlr4ts/TokenStream'; +import {Vocabulary} from 'antlr4ts/Vocabulary'; +import {VocabularyImpl} from 'antlr4ts/VocabularyImpl'; +import {ATN} from 'antlr4ts/atn/ATN'; +import {ATNDeserializer} from 'antlr4ts/atn/ATNDeserializer'; +import {ParserATNSimulator} from 'antlr4ts/atn/ParserATNSimulator'; +import * as Utils from 'antlr4ts/misc/Utils'; +import {TerminalNode} from 'antlr4ts/tree/TerminalNode'; + +import {OpSelectionListener} from './OpSelectionListener'; +import {OpSelectionVisitor} from './OpSelectionVisitor'; + +export class OpSelectionParser extends Parser { + public static readonly AND = 1; + public static readonly OR = 2; + public static readonly NOT = 3; + public static readonly STAR = 4; + public static readonly PLUS = 5; + public static readonly COLON = 6; + public static readonly LPAREN = 7; + public static readonly RPAREN = 8; + public static readonly NAME = 9; + public static readonly NAME_SUBSTRING = 10; + public static readonly QUOTED_STRING = 11; + public static readonly UNQUOTED_STRING = 12; + public static readonly WS = 13; + public static readonly RULE_start = 0; + public static readonly RULE_expr = 1; + public static readonly RULE_traversalAllowedExpr = 2; + public static readonly RULE_traversal = 3; + public static readonly RULE_attributeExpr = 4; + public static readonly RULE_value = 5; + // tslint:disable:no-trailing-whitespace + public static readonly ruleNames: string[] = [ + 'start', + 'expr', + 'traversalAllowedExpr', + 'traversal', + 'attributeExpr', + 'value', + ]; + + private static readonly _LITERAL_NAMES: Array = [ + undefined, + "'and'", + "'or'", + "'not'", + "'*'", + "'+'", + "':'", + "'('", + "')'", + "'name'", + "'name_substring'", + ]; + private static readonly _SYMBOLIC_NAMES: Array = [ + undefined, + 'AND', + 'OR', + 'NOT', + 'STAR', + 'PLUS', + 'COLON', + 'LPAREN', + 'RPAREN', + 'NAME', + 'NAME_SUBSTRING', + 'QUOTED_STRING', + 'UNQUOTED_STRING', + 'WS', + ]; + public static readonly VOCABULARY: Vocabulary = new VocabularyImpl( + OpSelectionParser._LITERAL_NAMES, + OpSelectionParser._SYMBOLIC_NAMES, + [], + ); + + // @Override + // @NotNull + public get vocabulary(): Vocabulary { + return OpSelectionParser.VOCABULARY; + } + // tslint:enable:no-trailing-whitespace + + // @Override + public get grammarFileName(): string { + return 'OpSelection.g4'; + } + + // @Override + public get ruleNames(): string[] { + return OpSelectionParser.ruleNames; + } + + // @Override + public get serializedATN(): string { + return OpSelectionParser._serializedATN; + } + + protected createFailedPredicateException( + predicate?: string, + message?: string, + ): FailedPredicateException { + return new FailedPredicateException(this, predicate, message); + } + + constructor(input: TokenStream) { + super(input); + this._interp = new ParserATNSimulator(OpSelectionParser._ATN, this); + } + // @RuleVersion(0) + public start(): StartContext { + const _localctx: StartContext = new StartContext(this._ctx, this.state); + this.enterRule(_localctx, 0, OpSelectionParser.RULE_start); + try { + this.enterOuterAlt(_localctx, 1); + { + this.state = 12; + this.expr(0); + this.state = 13; + this.match(OpSelectionParser.EOF); + } + } catch (re) { + if (re instanceof RecognitionException) { + _localctx.exception = re; + this._errHandler.reportError(this, re); + this._errHandler.recover(this, re); + } else { + throw re; + } + } finally { + this.exitRule(); + } + return _localctx; + } + + public expr(): ExprContext; + public expr(_p: number): ExprContext; + // @RuleVersion(0) + public expr(_p?: number): ExprContext { + if (_p === undefined) { + _p = 0; + } + + const _parentctx: ParserRuleContext = this._ctx; + const _parentState: number = this.state; + let _localctx: ExprContext = new ExprContext(this._ctx, _parentState); + let _prevctx: ExprContext = _localctx; + const _startState: number = 2; + this.enterRecursionRule(_localctx, 2, OpSelectionParser.RULE_expr, _p); + try { + let _alt: number; + this.enterOuterAlt(_localctx, 1); + { + this.state = 30; + this._errHandler.sync(this); + switch (this.interpreter.adaptivePredict(this._input, 0, this._ctx)) { + case 1: + { + _localctx = new TraversalAllowedExpressionContext(_localctx); + this._ctx = _localctx; + _prevctx = _localctx; + + this.state = 16; + this.traversalAllowedExpr(); + } + break; + + case 2: + { + _localctx = new UpAndDownTraversalExpressionContext(_localctx); + this._ctx = _localctx; + _prevctx = _localctx; + this.state = 17; + this.traversal(); + this.state = 18; + this.traversalAllowedExpr(); + this.state = 19; + this.traversal(); + } + break; + + case 3: + { + _localctx = new UpTraversalExpressionContext(_localctx); + this._ctx = _localctx; + _prevctx = _localctx; + this.state = 21; + this.traversal(); + this.state = 22; + this.traversalAllowedExpr(); + } + break; + + case 4: + { + _localctx = new DownTraversalExpressionContext(_localctx); + this._ctx = _localctx; + _prevctx = _localctx; + this.state = 24; + this.traversalAllowedExpr(); + this.state = 25; + this.traversal(); + } + break; + + case 5: + { + _localctx = new NotExpressionContext(_localctx); + this._ctx = _localctx; + _prevctx = _localctx; + this.state = 27; + this.match(OpSelectionParser.NOT); + this.state = 28; + this.expr(4); + } + break; + + case 6: + { + _localctx = new AllExpressionContext(_localctx); + this._ctx = _localctx; + _prevctx = _localctx; + this.state = 29; + this.match(OpSelectionParser.STAR); + } + break; + } + this._ctx._stop = this._input.tryLT(-1); + this.state = 40; + this._errHandler.sync(this); + _alt = this.interpreter.adaptivePredict(this._input, 2, this._ctx); + while (_alt !== 2 && _alt !== ATN.INVALID_ALT_NUMBER) { + if (_alt === 1) { + if (this._parseListeners != null) { + this.triggerExitRuleEvent(); + } + _prevctx = _localctx; + { + this.state = 38; + this._errHandler.sync(this); + switch (this.interpreter.adaptivePredict(this._input, 1, this._ctx)) { + case 1: + { + _localctx = new AndExpressionContext(new ExprContext(_parentctx, _parentState)); + this.pushNewRecursionContext( + _localctx, + _startState, + OpSelectionParser.RULE_expr, + ); + this.state = 32; + if (!this.precpred(this._ctx, 3)) { + throw this.createFailedPredicateException('this.precpred(this._ctx, 3)'); + } + this.state = 33; + this.match(OpSelectionParser.AND); + this.state = 34; + this.expr(4); + } + break; + + case 2: + { + _localctx = new OrExpressionContext(new ExprContext(_parentctx, _parentState)); + this.pushNewRecursionContext( + _localctx, + _startState, + OpSelectionParser.RULE_expr, + ); + this.state = 35; + if (!this.precpred(this._ctx, 2)) { + throw this.createFailedPredicateException('this.precpred(this._ctx, 2)'); + } + this.state = 36; + this.match(OpSelectionParser.OR); + this.state = 37; + this.expr(3); + } + break; + } + } + } + this.state = 42; + this._errHandler.sync(this); + _alt = this.interpreter.adaptivePredict(this._input, 2, this._ctx); + } + } + } catch (re) { + if (re instanceof RecognitionException) { + _localctx.exception = re; + this._errHandler.reportError(this, re); + this._errHandler.recover(this, re); + } else { + throw re; + } + } finally { + this.unrollRecursionContexts(_parentctx); + } + return _localctx; + } + // @RuleVersion(0) + public traversalAllowedExpr(): TraversalAllowedExprContext { + let _localctx: TraversalAllowedExprContext = new TraversalAllowedExprContext( + this._ctx, + this.state, + ); + this.enterRule(_localctx, 4, OpSelectionParser.RULE_traversalAllowedExpr); + try { + this.state = 48; + this._errHandler.sync(this); + switch (this._input.LA(1)) { + case OpSelectionParser.NAME: + case OpSelectionParser.NAME_SUBSTRING: + _localctx = new AttributeExpressionContext(_localctx); + this.enterOuterAlt(_localctx, 1); + { + this.state = 43; + this.attributeExpr(); + } + break; + case OpSelectionParser.LPAREN: + _localctx = new ParenthesizedExpressionContext(_localctx); + this.enterOuterAlt(_localctx, 2); + { + this.state = 44; + this.match(OpSelectionParser.LPAREN); + this.state = 45; + this.expr(0); + this.state = 46; + this.match(OpSelectionParser.RPAREN); + } + break; + default: + throw new NoViableAltException(this); + } + } catch (re) { + if (re instanceof RecognitionException) { + _localctx.exception = re; + this._errHandler.reportError(this, re); + this._errHandler.recover(this, re); + } else { + throw re; + } + } finally { + this.exitRule(); + } + return _localctx; + } + // @RuleVersion(0) + public traversal(): TraversalContext { + const _localctx: TraversalContext = new TraversalContext(this._ctx, this.state); + this.enterRule(_localctx, 6, OpSelectionParser.RULE_traversal); + try { + let _alt: number; + this.state = 56; + this._errHandler.sync(this); + switch (this._input.LA(1)) { + case OpSelectionParser.STAR: + this.enterOuterAlt(_localctx, 1); + { + this.state = 50; + this.match(OpSelectionParser.STAR); + } + break; + case OpSelectionParser.PLUS: + this.enterOuterAlt(_localctx, 2); + { + this.state = 52; + this._errHandler.sync(this); + _alt = 1; + do { + switch (_alt) { + case 1: + { + { + this.state = 51; + this.match(OpSelectionParser.PLUS); + } + } + break; + default: + throw new NoViableAltException(this); + } + this.state = 54; + this._errHandler.sync(this); + _alt = this.interpreter.adaptivePredict(this._input, 4, this._ctx); + } while (_alt !== 2 && _alt !== ATN.INVALID_ALT_NUMBER); + } + break; + default: + throw new NoViableAltException(this); + } + } catch (re) { + if (re instanceof RecognitionException) { + _localctx.exception = re; + this._errHandler.reportError(this, re); + this._errHandler.recover(this, re); + } else { + throw re; + } + } finally { + this.exitRule(); + } + return _localctx; + } + // @RuleVersion(0) + public attributeExpr(): AttributeExprContext { + let _localctx: AttributeExprContext = new AttributeExprContext(this._ctx, this.state); + this.enterRule(_localctx, 8, OpSelectionParser.RULE_attributeExpr); + try { + this.state = 64; + this._errHandler.sync(this); + switch (this._input.LA(1)) { + case OpSelectionParser.NAME: + _localctx = new NameExprContext(_localctx); + this.enterOuterAlt(_localctx, 1); + { + this.state = 58; + this.match(OpSelectionParser.NAME); + this.state = 59; + this.match(OpSelectionParser.COLON); + this.state = 60; + this.value(); + } + break; + case OpSelectionParser.NAME_SUBSTRING: + _localctx = new NameSubstringExprContext(_localctx); + this.enterOuterAlt(_localctx, 2); + { + this.state = 61; + this.match(OpSelectionParser.NAME_SUBSTRING); + this.state = 62; + this.match(OpSelectionParser.COLON); + this.state = 63; + this.value(); + } + break; + default: + throw new NoViableAltException(this); + } + } catch (re) { + if (re instanceof RecognitionException) { + _localctx.exception = re; + this._errHandler.reportError(this, re); + this._errHandler.recover(this, re); + } else { + throw re; + } + } finally { + this.exitRule(); + } + return _localctx; + } + // @RuleVersion(0) + public value(): ValueContext { + const _localctx: ValueContext = new ValueContext(this._ctx, this.state); + this.enterRule(_localctx, 10, OpSelectionParser.RULE_value); + let _la: number; + try { + this.enterOuterAlt(_localctx, 1); + { + this.state = 66; + _la = this._input.LA(1); + if ( + !(_la === OpSelectionParser.QUOTED_STRING || _la === OpSelectionParser.UNQUOTED_STRING) + ) { + this._errHandler.recoverInline(this); + } else { + if (this._input.LA(1) === Token.EOF) { + this.matchedEOF = true; + } + + this._errHandler.reportMatch(this); + this.consume(); + } + } + } catch (re) { + if (re instanceof RecognitionException) { + _localctx.exception = re; + this._errHandler.reportError(this, re); + this._errHandler.recover(this, re); + } else { + throw re; + } + } finally { + this.exitRule(); + } + return _localctx; + } + + public sempred(_localctx: RuleContext, ruleIndex: number, predIndex: number): boolean { + switch (ruleIndex) { + case 1: + return this.expr_sempred(_localctx as ExprContext, predIndex); + } + return true; + } + private expr_sempred(_localctx: ExprContext, predIndex: number): boolean { + switch (predIndex) { + case 0: + return this.precpred(this._ctx, 3); + + case 1: + return this.precpred(this._ctx, 2); + } + return true; + } + + public static readonly _serializedATN: string = + '\x03\uC91D\uCABA\u058D\uAFBA\u4F53\u0607\uEA8B\uC241\x03\x0FG\x04\x02' + + '\t\x02\x04\x03\t\x03\x04\x04\t\x04\x04\x05\t\x05\x04\x06\t\x06\x04\x07' + + '\t\x07\x03\x02\x03\x02\x03\x02\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03' + + '\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03' + + '\x03\x03\x05\x03!\n\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03' + + '\x07\x03)\n\x03\f\x03\x0E\x03,\v\x03\x03\x04\x03\x04\x03\x04\x03\x04\x03' + + '\x04\x05\x043\n\x04\x03\x05\x03\x05\x06\x057\n\x05\r\x05\x0E\x058\x05' + + '\x05;\n\x05\x03\x06\x03\x06\x03\x06\x03\x06\x03\x06\x03\x06\x05\x06C\n' + + '\x06\x03\x07\x03\x07\x03\x07\x02\x02\x03\x04\b\x02\x02\x04\x02\x06\x02' + + '\b\x02\n\x02\f\x02\x02\x03\x03\x02\r\x0E\x02K\x02\x0E\x03\x02\x02\x02' + + '\x04 \x03\x02\x02\x02\x062\x03\x02\x02\x02\b:\x03\x02\x02\x02\nB\x03\x02' + + '\x02\x02\fD\x03\x02\x02\x02\x0E\x0F\x05\x04\x03\x02\x0F\x10\x07\x02\x02' + + '\x03\x10\x03\x03\x02\x02\x02\x11\x12\b\x03\x01\x02\x12!\x05\x06\x04\x02' + + '\x13\x14\x05\b\x05\x02\x14\x15\x05\x06\x04\x02\x15\x16\x05\b\x05\x02\x16' + + '!\x03\x02\x02\x02\x17\x18\x05\b\x05\x02\x18\x19\x05\x06\x04\x02\x19!\x03' + + '\x02\x02\x02\x1A\x1B\x05\x06\x04\x02\x1B\x1C\x05\b\x05\x02\x1C!\x03\x02' + + '\x02\x02\x1D\x1E\x07\x05\x02\x02\x1E!\x05\x04\x03\x06\x1F!\x07\x06\x02' + + '\x02 \x11\x03\x02\x02\x02 \x13\x03\x02\x02\x02 \x17\x03\x02\x02\x02 \x1A' + + '\x03\x02\x02\x02 \x1D\x03\x02\x02\x02 \x1F\x03\x02\x02\x02!*\x03\x02\x02' + + '\x02"#\f\x05\x02\x02#$\x07\x03\x02\x02$)\x05\x04\x03\x06%&\f\x04\x02' + + "\x02&'\x07\x04\x02\x02')\x05\x04\x03\x05(\"\x03\x02\x02\x02(%\x03\x02" + + '\x02\x02),\x03\x02\x02\x02*(\x03\x02\x02\x02*+\x03\x02\x02\x02+\x05\x03' + + '\x02\x02\x02,*\x03\x02\x02\x02-3\x05\n\x06\x02./\x07\t\x02\x02/0\x05\x04' + + '\x03\x0201\x07\n\x02\x0213\x03\x02\x02\x022-\x03\x02\x02\x022.\x03\x02' + + '\x02\x023\x07\x03\x02\x02\x024;\x07\x06\x02\x0257\x07\x07\x02\x0265\x03' + + '\x02\x02\x0278\x03\x02\x02\x0286\x03\x02\x02\x0289\x03\x02\x02\x029;\x03' + + '\x02\x02\x02:4\x03\x02\x02\x02:6\x03\x02\x02\x02;\t\x03\x02\x02\x02<=' + + '\x07\v\x02\x02=>\x07\b\x02\x02>C\x05\f\x07\x02?@\x07\f\x02\x02@A\x07\b' + + '\x02\x02AC\x05\f\x07\x02B<\x03\x02\x02\x02B?\x03\x02\x02\x02C\v\x03\x02' + + '\x02\x02DE\t\x02\x02\x02E\r\x03\x02\x02\x02\t (*28:B'; + public static __ATN: ATN; + public static get _ATN(): ATN { + if (!OpSelectionParser.__ATN) { + OpSelectionParser.__ATN = new ATNDeserializer().deserialize( + Utils.toCharArray(OpSelectionParser._serializedATN), + ); + } + + return OpSelectionParser.__ATN; + } +} + +export class StartContext extends ParserRuleContext { + public expr(): ExprContext { + return this.getRuleContext(0, ExprContext); + } + public EOF(): TerminalNode { + return this.getToken(OpSelectionParser.EOF, 0); + } + constructor(parent: ParserRuleContext | undefined, invokingState: number) { + super(parent, invokingState); + } + // @Override + public get ruleIndex(): number { + return OpSelectionParser.RULE_start; + } + // @Override + public enterRule(listener: OpSelectionListener): void { + if (listener.enterStart) { + listener.enterStart(this); + } + } + // @Override + public exitRule(listener: OpSelectionListener): void { + if (listener.exitStart) { + listener.exitStart(this); + } + } + // @Override + public accept(visitor: OpSelectionVisitor): Result { + if (visitor.visitStart) { + return visitor.visitStart(this); + } else { + return visitor.visitChildren(this); + } + } +} + +export class ExprContext extends ParserRuleContext { + constructor(parent: ParserRuleContext | undefined, invokingState: number) { + super(parent, invokingState); + } + // @Override + public get ruleIndex(): number { + return OpSelectionParser.RULE_expr; + } + public copyFrom(ctx: ExprContext): void { + super.copyFrom(ctx); + } +} +export class TraversalAllowedExpressionContext extends ExprContext { + public traversalAllowedExpr(): TraversalAllowedExprContext { + return this.getRuleContext(0, TraversalAllowedExprContext); + } + constructor(ctx: ExprContext) { + super(ctx.parent, ctx.invokingState); + this.copyFrom(ctx); + } + // @Override + public enterRule(listener: OpSelectionListener): void { + if (listener.enterTraversalAllowedExpression) { + listener.enterTraversalAllowedExpression(this); + } + } + // @Override + public exitRule(listener: OpSelectionListener): void { + if (listener.exitTraversalAllowedExpression) { + listener.exitTraversalAllowedExpression(this); + } + } + // @Override + public accept(visitor: OpSelectionVisitor): Result { + if (visitor.visitTraversalAllowedExpression) { + return visitor.visitTraversalAllowedExpression(this); + } else { + return visitor.visitChildren(this); + } + } +} +export class UpAndDownTraversalExpressionContext extends ExprContext { + public traversal(): TraversalContext[]; + public traversal(i: number): TraversalContext; + public traversal(i?: number): TraversalContext | TraversalContext[] { + if (i === undefined) { + return this.getRuleContexts(TraversalContext); + } else { + return this.getRuleContext(i, TraversalContext); + } + } + public traversalAllowedExpr(): TraversalAllowedExprContext { + return this.getRuleContext(0, TraversalAllowedExprContext); + } + constructor(ctx: ExprContext) { + super(ctx.parent, ctx.invokingState); + this.copyFrom(ctx); + } + // @Override + public enterRule(listener: OpSelectionListener): void { + if (listener.enterUpAndDownTraversalExpression) { + listener.enterUpAndDownTraversalExpression(this); + } + } + // @Override + public exitRule(listener: OpSelectionListener): void { + if (listener.exitUpAndDownTraversalExpression) { + listener.exitUpAndDownTraversalExpression(this); + } + } + // @Override + public accept(visitor: OpSelectionVisitor): Result { + if (visitor.visitUpAndDownTraversalExpression) { + return visitor.visitUpAndDownTraversalExpression(this); + } else { + return visitor.visitChildren(this); + } + } +} +export class UpTraversalExpressionContext extends ExprContext { + public traversal(): TraversalContext { + return this.getRuleContext(0, TraversalContext); + } + public traversalAllowedExpr(): TraversalAllowedExprContext { + return this.getRuleContext(0, TraversalAllowedExprContext); + } + constructor(ctx: ExprContext) { + super(ctx.parent, ctx.invokingState); + this.copyFrom(ctx); + } + // @Override + public enterRule(listener: OpSelectionListener): void { + if (listener.enterUpTraversalExpression) { + listener.enterUpTraversalExpression(this); + } + } + // @Override + public exitRule(listener: OpSelectionListener): void { + if (listener.exitUpTraversalExpression) { + listener.exitUpTraversalExpression(this); + } + } + // @Override + public accept(visitor: OpSelectionVisitor): Result { + if (visitor.visitUpTraversalExpression) { + return visitor.visitUpTraversalExpression(this); + } else { + return visitor.visitChildren(this); + } + } +} +export class DownTraversalExpressionContext extends ExprContext { + public traversalAllowedExpr(): TraversalAllowedExprContext { + return this.getRuleContext(0, TraversalAllowedExprContext); + } + public traversal(): TraversalContext { + return this.getRuleContext(0, TraversalContext); + } + constructor(ctx: ExprContext) { + super(ctx.parent, ctx.invokingState); + this.copyFrom(ctx); + } + // @Override + public enterRule(listener: OpSelectionListener): void { + if (listener.enterDownTraversalExpression) { + listener.enterDownTraversalExpression(this); + } + } + // @Override + public exitRule(listener: OpSelectionListener): void { + if (listener.exitDownTraversalExpression) { + listener.exitDownTraversalExpression(this); + } + } + // @Override + public accept(visitor: OpSelectionVisitor): Result { + if (visitor.visitDownTraversalExpression) { + return visitor.visitDownTraversalExpression(this); + } else { + return visitor.visitChildren(this); + } + } +} +export class NotExpressionContext extends ExprContext { + public NOT(): TerminalNode { + return this.getToken(OpSelectionParser.NOT, 0); + } + public expr(): ExprContext { + return this.getRuleContext(0, ExprContext); + } + constructor(ctx: ExprContext) { + super(ctx.parent, ctx.invokingState); + this.copyFrom(ctx); + } + // @Override + public enterRule(listener: OpSelectionListener): void { + if (listener.enterNotExpression) { + listener.enterNotExpression(this); + } + } + // @Override + public exitRule(listener: OpSelectionListener): void { + if (listener.exitNotExpression) { + listener.exitNotExpression(this); + } + } + // @Override + public accept(visitor: OpSelectionVisitor): Result { + if (visitor.visitNotExpression) { + return visitor.visitNotExpression(this); + } else { + return visitor.visitChildren(this); + } + } +} +export class AndExpressionContext extends ExprContext { + public expr(): ExprContext[]; + public expr(i: number): ExprContext; + public expr(i?: number): ExprContext | ExprContext[] { + if (i === undefined) { + return this.getRuleContexts(ExprContext); + } else { + return this.getRuleContext(i, ExprContext); + } + } + public AND(): TerminalNode { + return this.getToken(OpSelectionParser.AND, 0); + } + constructor(ctx: ExprContext) { + super(ctx.parent, ctx.invokingState); + this.copyFrom(ctx); + } + // @Override + public enterRule(listener: OpSelectionListener): void { + if (listener.enterAndExpression) { + listener.enterAndExpression(this); + } + } + // @Override + public exitRule(listener: OpSelectionListener): void { + if (listener.exitAndExpression) { + listener.exitAndExpression(this); + } + } + // @Override + public accept(visitor: OpSelectionVisitor): Result { + if (visitor.visitAndExpression) { + return visitor.visitAndExpression(this); + } else { + return visitor.visitChildren(this); + } + } +} +export class OrExpressionContext extends ExprContext { + public expr(): ExprContext[]; + public expr(i: number): ExprContext; + public expr(i?: number): ExprContext | ExprContext[] { + if (i === undefined) { + return this.getRuleContexts(ExprContext); + } else { + return this.getRuleContext(i, ExprContext); + } + } + public OR(): TerminalNode { + return this.getToken(OpSelectionParser.OR, 0); + } + constructor(ctx: ExprContext) { + super(ctx.parent, ctx.invokingState); + this.copyFrom(ctx); + } + // @Override + public enterRule(listener: OpSelectionListener): void { + if (listener.enterOrExpression) { + listener.enterOrExpression(this); + } + } + // @Override + public exitRule(listener: OpSelectionListener): void { + if (listener.exitOrExpression) { + listener.exitOrExpression(this); + } + } + // @Override + public accept(visitor: OpSelectionVisitor): Result { + if (visitor.visitOrExpression) { + return visitor.visitOrExpression(this); + } else { + return visitor.visitChildren(this); + } + } +} +export class AllExpressionContext extends ExprContext { + public STAR(): TerminalNode { + return this.getToken(OpSelectionParser.STAR, 0); + } + constructor(ctx: ExprContext) { + super(ctx.parent, ctx.invokingState); + this.copyFrom(ctx); + } + // @Override + public enterRule(listener: OpSelectionListener): void { + if (listener.enterAllExpression) { + listener.enterAllExpression(this); + } + } + // @Override + public exitRule(listener: OpSelectionListener): void { + if (listener.exitAllExpression) { + listener.exitAllExpression(this); + } + } + // @Override + public accept(visitor: OpSelectionVisitor): Result { + if (visitor.visitAllExpression) { + return visitor.visitAllExpression(this); + } else { + return visitor.visitChildren(this); + } + } +} + +export class TraversalAllowedExprContext extends ParserRuleContext { + constructor(parent: ParserRuleContext | undefined, invokingState: number) { + super(parent, invokingState); + } + // @Override + public get ruleIndex(): number { + return OpSelectionParser.RULE_traversalAllowedExpr; + } + public copyFrom(ctx: TraversalAllowedExprContext): void { + super.copyFrom(ctx); + } +} +export class AttributeExpressionContext extends TraversalAllowedExprContext { + public attributeExpr(): AttributeExprContext { + return this.getRuleContext(0, AttributeExprContext); + } + constructor(ctx: TraversalAllowedExprContext) { + super(ctx.parent, ctx.invokingState); + this.copyFrom(ctx); + } + // @Override + public enterRule(listener: OpSelectionListener): void { + if (listener.enterAttributeExpression) { + listener.enterAttributeExpression(this); + } + } + // @Override + public exitRule(listener: OpSelectionListener): void { + if (listener.exitAttributeExpression) { + listener.exitAttributeExpression(this); + } + } + // @Override + public accept(visitor: OpSelectionVisitor): Result { + if (visitor.visitAttributeExpression) { + return visitor.visitAttributeExpression(this); + } else { + return visitor.visitChildren(this); + } + } +} +export class ParenthesizedExpressionContext extends TraversalAllowedExprContext { + public LPAREN(): TerminalNode { + return this.getToken(OpSelectionParser.LPAREN, 0); + } + public expr(): ExprContext { + return this.getRuleContext(0, ExprContext); + } + public RPAREN(): TerminalNode { + return this.getToken(OpSelectionParser.RPAREN, 0); + } + constructor(ctx: TraversalAllowedExprContext) { + super(ctx.parent, ctx.invokingState); + this.copyFrom(ctx); + } + // @Override + public enterRule(listener: OpSelectionListener): void { + if (listener.enterParenthesizedExpression) { + listener.enterParenthesizedExpression(this); + } + } + // @Override + public exitRule(listener: OpSelectionListener): void { + if (listener.exitParenthesizedExpression) { + listener.exitParenthesizedExpression(this); + } + } + // @Override + public accept(visitor: OpSelectionVisitor): Result { + if (visitor.visitParenthesizedExpression) { + return visitor.visitParenthesizedExpression(this); + } else { + return visitor.visitChildren(this); + } + } +} + +export class TraversalContext extends ParserRuleContext { + public STAR(): TerminalNode | undefined { + return this.tryGetToken(OpSelectionParser.STAR, 0); + } + public PLUS(): TerminalNode[]; + public PLUS(i: number): TerminalNode; + public PLUS(i?: number): TerminalNode | TerminalNode[] { + if (i === undefined) { + return this.getTokens(OpSelectionParser.PLUS); + } else { + return this.getToken(OpSelectionParser.PLUS, i); + } + } + constructor(parent: ParserRuleContext | undefined, invokingState: number) { + super(parent, invokingState); + } + // @Override + public get ruleIndex(): number { + return OpSelectionParser.RULE_traversal; + } + // @Override + public enterRule(listener: OpSelectionListener): void { + if (listener.enterTraversal) { + listener.enterTraversal(this); + } + } + // @Override + public exitRule(listener: OpSelectionListener): void { + if (listener.exitTraversal) { + listener.exitTraversal(this); + } + } + // @Override + public accept(visitor: OpSelectionVisitor): Result { + if (visitor.visitTraversal) { + return visitor.visitTraversal(this); + } else { + return visitor.visitChildren(this); + } + } +} + +export class AttributeExprContext extends ParserRuleContext { + constructor(parent: ParserRuleContext | undefined, invokingState: number) { + super(parent, invokingState); + } + // @Override + public get ruleIndex(): number { + return OpSelectionParser.RULE_attributeExpr; + } + public copyFrom(ctx: AttributeExprContext): void { + super.copyFrom(ctx); + } +} +export class NameExprContext extends AttributeExprContext { + public NAME(): TerminalNode { + return this.getToken(OpSelectionParser.NAME, 0); + } + public COLON(): TerminalNode { + return this.getToken(OpSelectionParser.COLON, 0); + } + public value(): ValueContext { + return this.getRuleContext(0, ValueContext); + } + constructor(ctx: AttributeExprContext) { + super(ctx.parent, ctx.invokingState); + this.copyFrom(ctx); + } + // @Override + public enterRule(listener: OpSelectionListener): void { + if (listener.enterNameExpr) { + listener.enterNameExpr(this); + } + } + // @Override + public exitRule(listener: OpSelectionListener): void { + if (listener.exitNameExpr) { + listener.exitNameExpr(this); + } + } + // @Override + public accept(visitor: OpSelectionVisitor): Result { + if (visitor.visitNameExpr) { + return visitor.visitNameExpr(this); + } else { + return visitor.visitChildren(this); + } + } +} +export class NameSubstringExprContext extends AttributeExprContext { + public NAME_SUBSTRING(): TerminalNode { + return this.getToken(OpSelectionParser.NAME_SUBSTRING, 0); + } + public COLON(): TerminalNode { + return this.getToken(OpSelectionParser.COLON, 0); + } + public value(): ValueContext { + return this.getRuleContext(0, ValueContext); + } + constructor(ctx: AttributeExprContext) { + super(ctx.parent, ctx.invokingState); + this.copyFrom(ctx); + } + // @Override + public enterRule(listener: OpSelectionListener): void { + if (listener.enterNameSubstringExpr) { + listener.enterNameSubstringExpr(this); + } + } + // @Override + public exitRule(listener: OpSelectionListener): void { + if (listener.exitNameSubstringExpr) { + listener.exitNameSubstringExpr(this); + } + } + // @Override + public accept(visitor: OpSelectionVisitor): Result { + if (visitor.visitNameSubstringExpr) { + return visitor.visitNameSubstringExpr(this); + } else { + return visitor.visitChildren(this); + } + } +} + +export class ValueContext extends ParserRuleContext { + public QUOTED_STRING(): TerminalNode | undefined { + return this.tryGetToken(OpSelectionParser.QUOTED_STRING, 0); + } + public UNQUOTED_STRING(): TerminalNode | undefined { + return this.tryGetToken(OpSelectionParser.UNQUOTED_STRING, 0); + } + constructor(parent: ParserRuleContext | undefined, invokingState: number) { + super(parent, invokingState); + } + // @Override + public get ruleIndex(): number { + return OpSelectionParser.RULE_value; + } + // @Override + public enterRule(listener: OpSelectionListener): void { + if (listener.enterValue) { + listener.enterValue(this); + } + } + // @Override + public exitRule(listener: OpSelectionListener): void { + if (listener.exitValue) { + listener.exitValue(this); + } + } + // @Override + public accept(visitor: OpSelectionVisitor): Result { + if (visitor.visitValue) { + return visitor.visitValue(this); + } else { + return visitor.visitChildren(this); + } + } +} diff --git a/js_modules/dagster-ui/packages/ui-core/src/op-selection/generated/OpSelectionVisitor.ts b/js_modules/dagster-ui/packages/ui-core/src/op-selection/generated/OpSelectionVisitor.ts new file mode 100644 index 0000000000000..19e9f3b480257 --- /dev/null +++ b/js_modules/dagster-ui/packages/ui-core/src/op-selection/generated/OpSelectionVisitor.ts @@ -0,0 +1,171 @@ +// Generated from /Users/briantu/repos/dagster/js_modules/dagster-ui/packages/ui-core/src/op-selection/OpSelection.g4 by ANTLR 4.9.0-SNAPSHOT + +import {ParseTreeVisitor} from 'antlr4ts/tree/ParseTreeVisitor'; + +import { + AllExpressionContext, + AndExpressionContext, + AttributeExprContext, + AttributeExpressionContext, + DownTraversalExpressionContext, + ExprContext, + NameExprContext, + NameSubstringExprContext, + NotExpressionContext, + OrExpressionContext, + ParenthesizedExpressionContext, + StartContext, + TraversalAllowedExprContext, + TraversalAllowedExpressionContext, + TraversalContext, + UpAndDownTraversalExpressionContext, + UpTraversalExpressionContext, + ValueContext, +} from './OpSelectionParser'; + +/** + * This interface defines a complete generic visitor for a parse tree produced + * by `OpSelectionParser`. + * + * @param The return type of the visit operation. Use `void` for + * operations with no return type. + */ +export interface OpSelectionVisitor extends ParseTreeVisitor { + /** + * Visit a parse tree produced by the `TraversalAllowedExpression` + * labeled alternative in `OpSelectionParser.expr`. + * @param ctx the parse tree + * @return the visitor result + */ + visitTraversalAllowedExpression?: (ctx: TraversalAllowedExpressionContext) => Result; + + /** + * Visit a parse tree produced by the `UpAndDownTraversalExpression` + * labeled alternative in `OpSelectionParser.expr`. + * @param ctx the parse tree + * @return the visitor result + */ + visitUpAndDownTraversalExpression?: (ctx: UpAndDownTraversalExpressionContext) => Result; + + /** + * Visit a parse tree produced by the `UpTraversalExpression` + * labeled alternative in `OpSelectionParser.expr`. + * @param ctx the parse tree + * @return the visitor result + */ + visitUpTraversalExpression?: (ctx: UpTraversalExpressionContext) => Result; + + /** + * Visit a parse tree produced by the `DownTraversalExpression` + * labeled alternative in `OpSelectionParser.expr`. + * @param ctx the parse tree + * @return the visitor result + */ + visitDownTraversalExpression?: (ctx: DownTraversalExpressionContext) => Result; + + /** + * Visit a parse tree produced by the `NotExpression` + * labeled alternative in `OpSelectionParser.expr`. + * @param ctx the parse tree + * @return the visitor result + */ + visitNotExpression?: (ctx: NotExpressionContext) => Result; + + /** + * Visit a parse tree produced by the `AndExpression` + * labeled alternative in `OpSelectionParser.expr`. + * @param ctx the parse tree + * @return the visitor result + */ + visitAndExpression?: (ctx: AndExpressionContext) => Result; + + /** + * Visit a parse tree produced by the `OrExpression` + * labeled alternative in `OpSelectionParser.expr`. + * @param ctx the parse tree + * @return the visitor result + */ + visitOrExpression?: (ctx: OrExpressionContext) => Result; + + /** + * Visit a parse tree produced by the `AllExpression` + * labeled alternative in `OpSelectionParser.expr`. + * @param ctx the parse tree + * @return the visitor result + */ + visitAllExpression?: (ctx: AllExpressionContext) => Result; + + /** + * Visit a parse tree produced by the `NameExpr` + * labeled alternative in `OpSelectionParser.attributeExpr`. + * @param ctx the parse tree + * @return the visitor result + */ + visitNameExpr?: (ctx: NameExprContext) => Result; + + /** + * Visit a parse tree produced by the `NameSubstringExpr` + * labeled alternative in `OpSelectionParser.attributeExpr`. + * @param ctx the parse tree + * @return the visitor result + */ + visitNameSubstringExpr?: (ctx: NameSubstringExprContext) => Result; + + /** + * Visit a parse tree produced by the `AttributeExpression` + * labeled alternative in `OpSelectionParser.traversalAllowedExpr`. + * @param ctx the parse tree + * @return the visitor result + */ + visitAttributeExpression?: (ctx: AttributeExpressionContext) => Result; + + /** + * Visit a parse tree produced by the `ParenthesizedExpression` + * labeled alternative in `OpSelectionParser.traversalAllowedExpr`. + * @param ctx the parse tree + * @return the visitor result + */ + visitParenthesizedExpression?: (ctx: ParenthesizedExpressionContext) => Result; + + /** + * Visit a parse tree produced by `OpSelectionParser.start`. + * @param ctx the parse tree + * @return the visitor result + */ + visitStart?: (ctx: StartContext) => Result; + + /** + * Visit a parse tree produced by `OpSelectionParser.expr`. + * @param ctx the parse tree + * @return the visitor result + */ + visitExpr?: (ctx: ExprContext) => Result; + + /** + * Visit a parse tree produced by `OpSelectionParser.traversalAllowedExpr`. + * @param ctx the parse tree + * @return the visitor result + */ + visitTraversalAllowedExpr?: (ctx: TraversalAllowedExprContext) => Result; + + /** + * Visit a parse tree produced by `OpSelectionParser.traversal`. + * @param ctx the parse tree + * @return the visitor result + */ + visitTraversal?: (ctx: TraversalContext) => Result; + + /** + * Visit a parse tree produced by `OpSelectionParser.attributeExpr`. + * @param ctx the parse tree + * @return the visitor result + */ + visitAttributeExpr?: (ctx: AttributeExprContext) => Result; + + /** + * Visit a parse tree produced by `OpSelectionParser.value`. + * @param ctx the parse tree + * @return the visitor result + */ + visitValue?: (ctx: ValueContext) => Result; +} diff --git a/js_modules/dagster-ui/packages/ui-core/src/pipelines/Description.tsx b/js_modules/dagster-ui/packages/ui-core/src/pipelines/Description.tsx index d44e41b7178ed..ba9834a38808c 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/pipelines/Description.tsx +++ b/js_modules/dagster-ui/packages/ui-core/src/pipelines/Description.tsx @@ -91,7 +91,7 @@ export const Description = ({maxHeight, description, fontSize}: IDescriptionProp )}
- {removeLeadingSpaces(description.replace('```', '```sql'))} + {removeLeadingSpaces(description)}
); diff --git a/js_modules/dagster-ui/packages/ui-core/src/runs/AssetTagCollections.tsx b/js_modules/dagster-ui/packages/ui-core/src/runs/AssetTagCollections.tsx index ca9220bf5177f..803ee593749e0 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/runs/AssetTagCollections.tsx +++ b/js_modules/dagster-ui/packages/ui-core/src/runs/AssetTagCollections.tsx @@ -24,6 +24,14 @@ import {TagActionsPopover} from '../ui/TagActions'; import {VirtualizedItemListForDialog} from '../ui/VirtualizedItemListForDialog'; import {numberFormatter} from '../ui/formatters'; +const sortItemAssetKey = (a: AssetKey, b: AssetKey) => { + return displayNameForAssetKey(a).localeCompare(displayNameForAssetKey(b)); +}; + +const sortItemAssetCheck = (a: Check, b: Check) => { + return labelForAssetCheck(a).localeCompare(labelForAssetCheck(b)); +}; + const renderItemAssetKey = (assetKey: AssetKey) => ( @@ -149,10 +157,20 @@ export function useAdjustChildVisibilityToFill(moreLabelFn: (count: number) => s export const AssetKeyTagCollection = React.memo((props: AssetKeyTagCollectionProps) => { const {assetKeys, useTags, maxRows, dialogTitle = 'Assets in run'} = props; - const {setShowMore, dialog} = useShowMoreDialog(dialogTitle, assetKeys, renderItemAssetKey); const count = assetKeys?.length ?? 0; const rendered = maxRows ? 10 : count === 1 ? 1 : 0; + + const {sortedAssetKeys, slicedSortedAssetKeys} = React.useMemo(() => { + const sortedAssetKeys = assetKeys?.slice().sort(sortItemAssetKey) ?? []; + return { + sortedAssetKeys, + slicedSortedAssetKeys: sortedAssetKeys?.slice(0, rendered) ?? [], + }; + }, [assetKeys, rendered]); + + const {setShowMore, dialog} = useShowMoreDialog(dialogTitle, sortedAssetKeys, renderItemAssetKey); + const moreLabelFn = React.useCallback( (displayed: number) => displayed === 0 @@ -165,7 +183,7 @@ export const AssetKeyTagCollection = React.memo((props: AssetKeyTagCollectionPro const {containerRef, moreLabelRef} = useAdjustChildVisibilityToFill(moreLabelFn); - if (!assetKeys || !assetKeys.length) { + if (!count) { return null; } @@ -180,7 +198,7 @@ export const AssetKeyTagCollection = React.memo((props: AssetKeyTagCollectionPro overflow: 'hidden', }} > - {assetKeys.slice(0, rendered).map((assetKey) => ( + {slicedSortedAssetKeys.map((assetKey) => ( // Outer span ensures the popover target is in the right place if the // parent is a flexbox. @@ -263,10 +281,24 @@ interface AssetCheckTagCollectionProps { export const AssetCheckTagCollection = React.memo((props: AssetCheckTagCollectionProps) => { const {assetChecks, maxRows, useTags, dialogTitle = 'Asset checks in run'} = props; - const {setShowMore, dialog} = useShowMoreDialog(dialogTitle, assetChecks, renderItemAssetCheck); const count = assetChecks?.length ?? 0; const rendered = maxRows ? 10 : count === 1 ? 1 : 0; + + const {sortedAssetChecks, slicedSortedAssetChecks} = React.useMemo(() => { + const sortedAssetChecks = assetChecks?.slice().sort(sortItemAssetCheck) ?? []; + return { + sortedAssetChecks, + slicedSortedAssetChecks: sortedAssetChecks?.slice(0, rendered) ?? [], + }; + }, [assetChecks, rendered]); + + const {setShowMore, dialog} = useShowMoreDialog( + dialogTitle, + sortedAssetChecks, + renderItemAssetCheck, + ); + const moreLabelFn = React.useCallback( (displayed: number) => displayed === 0 @@ -279,7 +311,7 @@ export const AssetCheckTagCollection = React.memo((props: AssetCheckTagCollectio const {containerRef, moreLabelRef} = useAdjustChildVisibilityToFill(moreLabelFn); - if (!assetChecks || !assetChecks.length) { + if (!count) { return null; } @@ -294,7 +326,7 @@ export const AssetCheckTagCollection = React.memo((props: AssetCheckTagCollectio overflow: 'hidden', }} > - {assetChecks.slice(0, rendered).map((check) => ( + {slicedSortedAssetChecks.map((check) => ( { ...node, clientsideKey: `csk${node.timestamp}-${ii}`, })); - const nodes = [...state.nodes, ...queuedNodes]; + + const copy = state.nodeChunks.slice(); + copy.push(queuedNodes); + const counts = {...state.counts}; queuedNodes.forEach((node) => { const level = logNodeLevel(node); counts[level]++; }); - return {nodes, counts, loading: action.hasMore, cursor: action.cursor}; + + return {nodeChunks: copy, counts, loading: action.hasMore, cursor: action.cursor}; } case 'set-cursor': return {...state, cursor: action.cursor}; case 'reset': - return {nodes: [], counts: emptyCounts, cursor: null, loading: true}; + return {nodeChunks: [], counts: emptyCounts, cursor: null, loading: true}; default: return state; } }; const initialState: State = { - nodes: [], + nodeChunks: [] as LogNode[][], counts: emptyCounts, cursor: null, loading: true, @@ -184,7 +188,7 @@ const useLogsProviderWithSubscription = (runId: string) => { }, BATCH_INTERVAL); }, [syncPipelineStatusToApolloCache]); - const {nodes, counts, cursor, loading} = state; + const {nodeChunks, counts, cursor, loading} = state; const {availability, disabled, status} = React.useContext(WebSocketContext); const lostWebsocket = !disabled && availability === 'available' && status === WebSocket.CLOSED; @@ -227,10 +231,10 @@ const useLogsProviderWithSubscription = (runId: string) => { return React.useMemo( () => - nodes !== null - ? {allNodes: nodes, counts, loading, subscriptionComponent} - : {allNodes: [], counts, loading, subscriptionComponent}, - [counts, loading, nodes, subscriptionComponent], + nodeChunks !== null + ? {allNodeChunks: nodeChunks, counts, loading, subscriptionComponent} + : {allNodeChunks: [], counts, loading, subscriptionComponent}, + [counts, loading, nodeChunks, subscriptionComponent], ); }; @@ -285,7 +289,7 @@ const POLL_INTERVAL = 5000; const LogsProviderWithQuery = (props: LogsProviderWithQueryProps) => { const {children, runId} = props; const [state, dispatch] = React.useReducer(reducer, initialState); - const {counts, cursor, nodes} = state; + const {counts, cursor, nodeChunks} = state; const dependency = useTraceDependency('RunLogsQuery'); @@ -332,9 +336,9 @@ const LogsProviderWithQuery = (props: LogsProviderWithQueryProps) => { return ( <> {children( - nodes !== null && nodes.length > 0 - ? {allNodes: nodes, counts, loading: false} - : {allNodes: [], counts, loading: true}, + nodeChunks !== null && nodeChunks.length > 0 + ? {allNodeChunks: nodeChunks, counts, loading: false} + : {allNodeChunks: [], counts, loading: true}, )} ); @@ -350,7 +354,7 @@ export const LogsProvider = (props: LogsProviderProps) => { } if (availability === 'attempting-to-connect') { - return <>{children({allNodes: [], counts: emptyCounts, loading: true})}; + return <>{children({allNodeChunks: [], counts: emptyCounts, loading: true})}; } return {children}; diff --git a/js_modules/dagster-ui/packages/ui-core/src/runs/LogsScrollingTable.tsx b/js_modules/dagster-ui/packages/ui-core/src/runs/LogsScrollingTable.tsx index 35ff1f61b5c7a..a89eac0f5b8c6 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/runs/LogsScrollingTable.tsx +++ b/js_modules/dagster-ui/packages/ui-core/src/runs/LogsScrollingTable.tsx @@ -1,4 +1,4 @@ -import {Box, Colors, NonIdealState, Row} from '@dagster-io/ui-components'; +import {Box, Colors, NonIdealState, Row, SpinnerWithText} from '@dagster-io/ui-components'; import {useVirtualizer} from '@tanstack/react-virtual'; import {useEffect, useRef} from 'react'; import styled from 'styled-components'; @@ -81,12 +81,10 @@ export const LogsScrollingTable = (props: Props) => { }, [totalHeight, virtualizer]); const content = () => { - if (logs.loading) { + if (logs.allNodeChunks.length === 0 && logs.loading) { return ( - - - - + + ); } diff --git a/js_modules/dagster-ui/packages/ui-core/src/runs/RunMetadataProvider.tsx b/js_modules/dagster-ui/packages/ui-core/src/runs/RunMetadataProvider.tsx index b6f2f269f0068..c99bfc4106a6f 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/runs/RunMetadataProvider.tsx +++ b/js_modules/dagster-ui/packages/ui-core/src/runs/RunMetadataProvider.tsx @@ -3,6 +3,7 @@ import * as React from 'react'; import {LogsProviderLogs} from './LogsProvider'; import {RunContext} from './RunContext'; import {gql} from '../apollo-client'; +import {flattenOneLevel} from '../util/flattenOneLevel'; import {RunFragment} from './types/RunFragments.types'; import {RunMetadataProviderMessageFragment} from './types/RunMetadataProvider.types'; import {StepEventStatus} from '../graphql/types'; @@ -371,7 +372,8 @@ export const RunMetadataProvider = ({logs, children}: IRunMetadataProviderProps) const run = React.useContext(RunContext); const runMetadata = React.useMemo(() => extractMetadataFromRun(run), [run]); const metadata = React.useMemo( - () => (logs.loading ? runMetadata : extractMetadataFromLogs(logs.allNodes)), + () => + logs.loading ? runMetadata : extractMetadataFromLogs(flattenOneLevel(logs.allNodeChunks)), [logs, runMetadata], ); return <>{children(metadata)}; diff --git a/js_modules/dagster-ui/packages/ui-core/src/runs/StepLogsDialog.tsx b/js_modules/dagster-ui/packages/ui-core/src/runs/StepLogsDialog.tsx index b8506ab6a6f2c..4f107eb164302 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/runs/StepLogsDialog.tsx +++ b/js_modules/dagster-ui/packages/ui-core/src/runs/StepLogsDialog.tsx @@ -8,7 +8,7 @@ import { Mono, Spinner, } from '@dagster-io/ui-components'; -import {useState} from 'react'; +import {useMemo, useState} from 'react'; import * as React from 'react'; import {Link} from 'react-router-dom'; import styled from 'styled-components'; @@ -22,6 +22,7 @@ import {IRunMetadataDict, RunMetadataProvider} from './RunMetadataProvider'; import {titleForRun} from './RunUtils'; import {useComputeLogFileKeyForSelection} from './useComputeLogFileKeyForSelection'; import {DagsterEventType} from '../graphql/types'; +import {flattenOneLevel} from '../util/flattenOneLevel'; export function useStepLogs({runId, stepKeys}: {runId?: string; stepKeys?: string[]}) { const [showingLogs, setShowingLogs] = React.useState<{runId: string; stepKeys: string[]} | null>( @@ -113,9 +114,12 @@ export const StepLogsModalContent = ({ const [logType, setComputeLogType] = useState(LogType.structured); const [computeLogUrl, setComputeLogUrl] = React.useState(null); - const firstLogForStep = logs.allNodes.find( + const flatLogs = useMemo(() => flattenOneLevel(logs.allNodeChunks), [logs]); + + const firstLogForStep = flatLogs.find( (l) => l.eventType === DagsterEventType.STEP_START && l.stepKey && stepKeys.includes(l.stepKey), ); + const firstLogForStepTime = firstLogForStep ? Number(firstLogForStep.timestamp) : 0; const [filter, setFilter] = useState({ diff --git a/js_modules/dagster-ui/packages/ui-core/src/runs/filterLogs.tsx b/js_modules/dagster-ui/packages/ui-core/src/runs/filterLogs.tsx index 368fadca1b641..0882ed53b3181 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/runs/filterLogs.tsx +++ b/js_modules/dagster-ui/packages/ui-core/src/runs/filterLogs.tsx @@ -3,9 +3,10 @@ import {eventTypeToDisplayType} from './getRunFilterProviders'; import {logNodeLevel} from './logNodeLevel'; import {LogNode} from './types'; import {weakmapMemoize} from '../app/Util'; +import {flattenOneLevel} from '../util/flattenOneLevel'; export function filterLogs(logs: LogsProviderLogs, filter: LogFilter, filterStepKeys: string[]) { - const filteredNodes = logs.allNodes.filter((node) => { + const filteredNodes = flattenOneLevel(logs.allNodeChunks).filter((node) => { // These events are used to determine which assets a run will materialize and are not intended // to be displayed in the Dagster UI. Pagination is offset based, so we remove these logs client-side. if ( diff --git a/js_modules/dagster-ui/packages/ui-core/src/scripts/generateOpSelection.ts b/js_modules/dagster-ui/packages/ui-core/src/scripts/generateOpSelection.ts new file mode 100644 index 0000000000000..ff79e194f3a8d --- /dev/null +++ b/js_modules/dagster-ui/packages/ui-core/src/scripts/generateOpSelection.ts @@ -0,0 +1,16 @@ +import {execSync} from 'child_process'; +import path from 'path'; + +const OP_SELECTION_GRAMMAR_FILE_PATH = path.resolve('./src/op-selection/OpSelection.g4'); +execSync(`antlr4ts -visitor -o ./src/op-selection/generated ${OP_SELECTION_GRAMMAR_FILE_PATH}`); + +const files = [ + 'OpSelectionLexer.ts', + 'OpSelectionListener.ts', + 'OpSelectionParser.ts', + 'OpSelectionVisitor.ts', +]; + +files.forEach((file) => { + execSync(`yarn prettier ./src/op-selection/generated/${file} --write`); +}); diff --git a/js_modules/dagster-ui/packages/ui-core/src/selection/SelectionAutoComplete.ts b/js_modules/dagster-ui/packages/ui-core/src/selection/SelectionAutoComplete.ts index 8603de6447fe8..8973cd0835cdd 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/selection/SelectionAutoComplete.ts +++ b/js_modules/dagster-ui/packages/ui-core/src/selection/SelectionAutoComplete.ts @@ -38,7 +38,7 @@ type TextCallback = (value: string) => string; const DEFAULT_TEXT_CALLBACK = (value: string) => value; // set to true for useful debug output. -const DEBUG = true; +const DEBUG = false; export class SelectionAutoCompleteVisitor extends AbstractParseTreeVisitor @@ -507,11 +507,15 @@ export class SelectionAutoCompleteVisitor } } -export function createSelectionHint, N extends keyof T>( - _nameBase: N, - attributesMap: T, - functions: string[], -): CodeMirror.HintFunction { +export function createSelectionHint, N extends keyof T>({ + nameBase: _nameBase, + attributesMap, + functions, +}: { + nameBase: N; + attributesMap: T; + functions: string[]; +}): CodeMirror.HintFunction { const nameBase = _nameBase as string; return function (cm: CodeMirror.Editor, _options: CodeMirror.ShowHintOptions): any { diff --git a/js_modules/dagster-ui/packages/ui-core/src/selection/SelectionAutoCompleteInput.tsx b/js_modules/dagster-ui/packages/ui-core/src/selection/SelectionAutoCompleteInput.tsx new file mode 100644 index 0000000000000..e84b930cf248d --- /dev/null +++ b/js_modules/dagster-ui/packages/ui-core/src/selection/SelectionAutoCompleteInput.tsx @@ -0,0 +1,212 @@ +import {Colors, Icon} from '@dagster-io/ui-components'; +import CodeMirror, {Editor, HintFunction} from 'codemirror'; +import {Linter} from 'codemirror/addon/lint/lint'; +import {useLayoutEffect, useMemo, useRef} from 'react'; +import styled, {createGlobalStyle, css} from 'styled-components'; + +import { + SelectionAutoCompleteInputCSS, + applyStaticSyntaxHighlighting, +} from './SelectionAutoCompleteHighlighter'; +import {useUpdatingRef} from '../hooks/useUpdatingRef'; +import {createSelectionHint} from '../selection/SelectionAutoComplete'; + +import 'codemirror/addon/edit/closebrackets'; +import 'codemirror/lib/codemirror.css'; +import 'codemirror/addon/hint/show-hint'; +import 'codemirror/addon/hint/show-hint.css'; +import 'codemirror/addon/lint/lint.css'; +import 'codemirror/addon/display/placeholder'; + +type SelectionAutoCompleteInputProps, N extends keyof T> = { + nameBase: N; + attributesMap: T; + placeholder: string; + functions: string[]; + linter: Linter; + value: string; + onChange: (value: string) => void; +}; + +export const SelectionAutoCompleteInput = , N extends keyof T>({ + value, + nameBase, + placeholder, + onChange, + functions, + linter, + attributesMap, +}: SelectionAutoCompleteInputProps) => { + const editorRef = useRef(null); + const cmInstance = useRef(null); + + const currentValueRef = useUpdatingRef(value); + const currentPendingValueRef = useRef(value); + const setValueTimeoutRef = useRef>(null); + + const hintRef = useUpdatingRef( + useMemo(() => { + return createSelectionHint({nameBase, attributesMap, functions}); + }, [nameBase, attributesMap, functions]), + ); + + useLayoutEffect(() => { + if (editorRef.current && !cmInstance.current) { + cmInstance.current = CodeMirror(editorRef.current, { + value, + mode: 'assetSelection', + lineNumbers: false, + lineWrapping: false, + scrollbarStyle: 'native', + autoCloseBrackets: true, + lint: { + getAnnotations: linter, + async: false, + }, + placeholder, + extraKeys: { + 'Ctrl-Space': 'autocomplete', + Tab: (cm: Editor) => { + cm.replaceSelection(' ', 'end'); + }, + }, + }); + + cmInstance.current.setSize('100%', 20); + + // Enforce single line by preventing newlines + cmInstance.current.on('beforeChange', (_instance: Editor, change) => { + if (change.text.some((line) => line.includes('\n'))) { + change.cancel(); + } + }); + + cmInstance.current.on('change', (instance: Editor, change) => { + const newValue = instance.getValue().replace(/\s+/g, ' '); + currentPendingValueRef.current = newValue; + if (setValueTimeoutRef.current) { + clearTimeout(setValueTimeoutRef.current); + } + setValueTimeoutRef.current = setTimeout(() => { + onChange(newValue); + }, 2000); + + if (change.origin === 'complete' && change.text[0]?.endsWith('()')) { + // Set cursor inside the right parenthesis + const cursor = instance.getCursor(); + instance.setCursor({...cursor, ch: cursor.ch - 1}); + } + }); + + cmInstance.current.on('inputRead', (instance: Editor) => { + showHint(instance, hintRef.current); + }); + + cmInstance.current.on('cursorActivity', (instance: Editor) => { + applyStaticSyntaxHighlighting(instance); + showHint(instance, hintRef.current); + }); + + cmInstance.current.on('blur', () => { + if (currentPendingValueRef.current !== currentValueRef.current) { + onChange(currentPendingValueRef.current); + } + }); + + requestAnimationFrame(() => { + if (!cmInstance.current) { + return; + } + + applyStaticSyntaxHighlighting(cmInstance.current); + }); + } + + return () => { + const cm = cmInstance.current; + if (cm) { + // Clean up the instance... + cm.closeHint(); + cm.getWrapperElement()?.parentNode?.removeChild(cm.getWrapperElement()); + } + }; + // eslint-disable-next-line react-hooks/exhaustive-deps + }, []); + + // Update CodeMirror when value prop changes + useLayoutEffect(() => { + const noNewLineValue = value.replace('\n', ' '); + if (cmInstance.current && cmInstance.current.getValue() !== noNewLineValue) { + const instance = cmInstance.current; + const cursor = instance.getCursor(); + instance.setValue(noNewLineValue); + instance.setCursor(cursor); + showHint(instance, hintRef.current); + } + }, [hintRef, value]); + + return ( + <> + + + +
+ + + ); +}; + +export const iconStyle = (img: string) => css` + &:before { + content: ' '; + width: 14px; + mask-size: contain; + mask-repeat: no-repeat; + mask-position: center; + mask-image: url(${img}); + background: ${Colors.accentPrimary()}; + display: inline-block; + } +`; + +export const InputDiv = styled.div` + ${SelectionAutoCompleteInputCSS} +`; + +const GlobalHintStyles = createGlobalStyle` + .CodeMirror-hints { + background: ${Colors.popoverBackground()}; + border: none; + border-radius: 4px; + padding: 8px 4px; + .CodeMirror-hint { + border-radius: 4px; + font-size: 14px; + padding: 6px 8px 6px 12px; + color: ${Colors.textDefault()}; + &.CodeMirror-hint-active { + background-color: ${Colors.backgroundBlue()}; + color: ${Colors.textDefault()}; + } + } + } +`; + +function showHint(instance: Editor, hint: HintFunction) { + requestAnimationFrame(() => { + requestAnimationFrame(() => { + instance.showHint({ + hint, + completeSingle: false, + moveOnOverlap: true, + updateOnCursorActivity: true, + }); + }); + }); +} diff --git a/js_modules/dagster-ui/packages/ui-core/src/selection/__tests__/SelectionAutoComplete.test.ts b/js_modules/dagster-ui/packages/ui-core/src/selection/__tests__/SelectionAutoComplete.test.ts index 9e63b325f6d1a..88322779bd09a 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/selection/__tests__/SelectionAutoComplete.test.ts +++ b/js_modules/dagster-ui/packages/ui-core/src/selection/__tests__/SelectionAutoComplete.test.ts @@ -3,9 +3,9 @@ import {Hint, Hints, Position} from 'codemirror'; import {createSelectionHint} from '../SelectionAutoComplete'; describe('createAssetSelectionHint', () => { - const selectionHint = createSelectionHint( - 'key', - { + const selectionHint = createSelectionHint({ + nameBase: 'key', + attributesMap: { key: ['asset1', 'asset2', 'asset3'], tag: ['tag1', 'tag2', 'tag3'], owner: ['marco@dagsterlabs.com', 'team:frontend'], @@ -13,8 +13,8 @@ describe('createAssetSelectionHint', () => { kind: ['kind1', 'kind2'], code_location: ['repo1@location1', 'repo2@location2'], }, - ['sinks', 'roots'], - ); + functions: ['sinks', 'roots'], + }); type HintsModified = Omit & { list: Array; @@ -818,4 +818,65 @@ describe('createAssetSelectionHint', () => { to: 60, }); }); + + it('handles complex ands/ors', () => { + expect(testAutocomplete('key:"value"* or tag:"value"+ and owner:"owner" and |')).toEqual({ + from: 51, + list: [ + { + displayText: 'key_substring:', + text: 'key_substring:', + }, + { + displayText: 'key:', + text: 'key:', + }, + { + displayText: 'tag:', + text: 'tag:', + }, + { + displayText: 'owner:', + text: 'owner:', + }, + { + displayText: 'group:', + text: 'group:', + }, + { + displayText: 'kind:', + text: 'kind:', + }, + { + displayText: 'code_location:', + text: 'code_location:', + }, + { + displayText: 'sinks()', + text: 'sinks()', + }, + { + displayText: 'roots()', + text: 'roots()', + }, + { + displayText: 'not', + text: 'not ', + }, + { + displayText: '*', + text: '*', + }, + { + displayText: '+', + text: '+', + }, + { + displayText: '(', + text: '()', + }, + ], + to: 51, + }); + }); }); diff --git a/js_modules/dagster-ui/packages/ui-core/src/selection/createSelectionLinter.ts b/js_modules/dagster-ui/packages/ui-core/src/selection/createSelectionLinter.ts new file mode 100644 index 0000000000000..ac10d4c98f504 --- /dev/null +++ b/js_modules/dagster-ui/packages/ui-core/src/selection/createSelectionLinter.ts @@ -0,0 +1,46 @@ +import {CharStreams, CommonTokenStream, Lexer, Parser, ParserRuleContext} from 'antlr4ts'; +import CodeMirror from 'codemirror'; +import {Linter} from 'codemirror/addon/lint/lint'; + +import {CustomErrorListener} from './CustomErrorListener'; + +type LexerConstructor = new (...args: any[]) => Lexer; +type ParserConstructor = new (...args: any[]) => Parser & { + start: () => ParserRuleContext; +}; + +export function createSelectionLinter({ + Lexer: LexerKlass, + Parser: ParserKlass, +}: { + Lexer: LexerConstructor; + Parser: ParserConstructor; +}): Linter { + return (text: string) => { + const errorListener = new CustomErrorListener(); + + const inputStream = CharStreams.fromString(text); + const lexer = new LexerKlass(inputStream); + + lexer.removeErrorListeners(); + lexer.addErrorListener(errorListener); + + const tokens = new CommonTokenStream(lexer); + const parser = new ParserKlass(tokens); + + parser.removeErrorListeners(); // Remove default console error listener + parser.addErrorListener(errorListener); + + parser.start(); + + // Map syntax errors to CodeMirror's lint format + const lintErrors = errorListener.getErrors().map((error) => ({ + message: error.message.replace(', ', ''), + severity: 'error', + from: CodeMirror.Pos(error.line, error.column), + to: CodeMirror.Pos(error.line, text.length), + })); + + return lintErrors; + }; +} diff --git a/js_modules/dagster-ui/packages/ui-core/src/ticks/EvaluateScheduleDialog.tsx b/js_modules/dagster-ui/packages/ui-core/src/ticks/EvaluateScheduleDialog.tsx index 06add60352b2c..7262979f9473a 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/ticks/EvaluateScheduleDialog.tsx +++ b/js_modules/dagster-ui/packages/ui-core/src/ticks/EvaluateScheduleDialog.tsx @@ -34,6 +34,7 @@ import {showCustomAlert} from '../app/CustomAlertProvider'; import {PYTHON_ERROR_FRAGMENT} from '../app/PythonErrorFragment'; import {PythonErrorInfo} from '../app/PythonErrorInfo'; import {assertUnreachable} from '../app/Util'; +import {useTrackEvent} from '../app/analytics'; import {TimeContext} from '../app/time/TimeContext'; import {timestampToString} from '../app/time/timestampToString'; import {PythonErrorFragment} from '../app/types/PythonErrorFragment.types'; @@ -77,6 +78,8 @@ export const EvaluateScheduleDialog = (props: Props) => { }; const EvaluateSchedule = ({repoAddress, name, onClose, jobName}: Props) => { + const trackEvent = useTrackEvent(); + const [selectedTimestamp, setSelectedTimestamp] = useState<{ts: number; label: string}>(); const scheduleSelector: ScheduleSelector = useMemo( () => ({ @@ -181,6 +184,8 @@ const EvaluateSchedule = ({repoAddress, name, onClose, jobName}: Props) => { if (!canLaunchAll) { return; } + + trackEvent('launch-all-schedule'); setLaunching(true); try { @@ -193,7 +198,7 @@ const EvaluateSchedule = ({repoAddress, name, onClose, jobName}: Props) => { setLaunching(false); onClose(); - }, [canLaunchAll, executionParamsList, launchMultipleRunsWithTelemetry, onClose]); + }, [canLaunchAll, executionParamsList, launchMultipleRunsWithTelemetry, onClose, trackEvent]); const content = useMemo(() => { // launching all runs state diff --git a/js_modules/dagster-ui/packages/ui-core/src/ticks/SensorDryRunDialog.tsx b/js_modules/dagster-ui/packages/ui-core/src/ticks/SensorDryRunDialog.tsx index 1cd0bda5aad43..1047d13d9a391 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/ticks/SensorDryRunDialog.tsx +++ b/js_modules/dagster-ui/packages/ui-core/src/ticks/SensorDryRunDialog.tsx @@ -32,6 +32,7 @@ import {showSharedToaster} from '../app/DomUtils'; import {PYTHON_ERROR_FRAGMENT} from '../app/PythonErrorFragment'; import {PythonErrorInfo} from '../app/PythonErrorInfo'; import {assertUnreachable} from '../app/Util'; +import {useTrackEvent} from '../app/analytics'; import {PythonErrorFragment} from '../app/types/PythonErrorFragment.types'; import {SensorSelector} from '../graphql/types'; import {useLaunchMultipleRunsWithTelemetry} from '../launchpad/useLaunchMultipleRunsWithTelemetry'; @@ -74,6 +75,8 @@ export const SensorDryRunDialog = (props: Props) => { }; const SensorDryRun = ({repoAddress, name, currentCursor, onClose, jobName}: Props) => { + const trackEvent = useTrackEvent(); + const [sensorDryRun] = useMutation( EVALUATE_SENSOR_MUTATION, ); @@ -187,6 +190,8 @@ const SensorDryRun = ({repoAddress, name, currentCursor, onClose, jobName}: Prop if (!canLaunchAll) { return; } + + trackEvent('launch-all-sensor'); setLaunching(true); try { @@ -206,6 +211,7 @@ const SensorDryRun = ({repoAddress, name, currentCursor, onClose, jobName}: Prop launchMultipleRunsWithTelemetry, onClose, onCommitTickResult, + trackEvent, ]); const leftButtons = useMemo(() => { diff --git a/js_modules/dagster-ui/packages/ui-core/src/ticks/__tests__/EvaluateScheduleDialog.test.tsx b/js_modules/dagster-ui/packages/ui-core/src/ticks/__tests__/EvaluateScheduleDialog.test.tsx index 6c17208d0628d..a8a2b55c06eb5 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/ticks/__tests__/EvaluateScheduleDialog.test.tsx +++ b/js_modules/dagster-ui/packages/ui-core/src/ticks/__tests__/EvaluateScheduleDialog.test.tsx @@ -4,6 +4,7 @@ import userEvent from '@testing-library/user-event'; import {MemoryRouter, useHistory} from 'react-router-dom'; import {Resolvers} from '../../apollo-client'; +import {useTrackEvent} from '../../app/analytics'; import {EvaluateScheduleDialog} from '../EvaluateScheduleDialog'; import { GetScheduleQueryMock, @@ -26,6 +27,11 @@ jest.mock('react-router-dom', () => ({ useHistory: jest.fn(), })); +// Mocking useTrackEvent +jest.mock('../../app/analytics', () => ({ + useTrackEvent: jest.fn(() => jest.fn()), +})); + const onCloseMock = jest.fn(); function Test({mocks, resolvers}: {mocks?: MockedResponse[]; resolvers?: Resolvers}) { @@ -119,6 +125,8 @@ describe('EvaluateScheduleTest', () => { createHref: createHrefSpy, }); + (useTrackEvent as jest.Mock).mockReturnValue(jest.fn()); + render( ({ useHistory: jest.fn(), })); +// Mocking useTrackEvent +jest.mock('../../app/analytics', () => ({ + useTrackEvent: jest.fn(() => jest.fn()), +})); + const onCloseMock = jest.fn(); function Test({mocks, resolvers}: {mocks?: MockedResponse[]; resolvers?: Resolvers}) { @@ -98,6 +104,8 @@ describe('SensorDryRunTest', () => { createHref: createHrefSpy, }); + (useTrackEvent as jest.Mock).mockReturnValue(jest.fn()); + render( { return ( - + <> + + + ); }; diff --git a/js_modules/dagster-ui/packages/ui-core/src/util/flattenOneLevel.tsx b/js_modules/dagster-ui/packages/ui-core/src/util/flattenOneLevel.tsx new file mode 100644 index 0000000000000..18c8b12347bd5 --- /dev/null +++ b/js_modules/dagster-ui/packages/ui-core/src/util/flattenOneLevel.tsx @@ -0,0 +1,10 @@ +/** + * Flattens a two-dimensional array into a one-dimensional array. + * + * @param nodeChunks - The two-dimensional array to flatten. + * @returns The flattened one-dimensional array. + */ +// https://jsbench.me/o8kqzo8olz/1 +export function flattenOneLevel(arrays: T[][]) { + return ([] as T[]).concat(...arrays); +} diff --git a/js_modules/dagster-ui/yarn.lock b/js_modules/dagster-ui/yarn.lock index 0db59a3eab6db..274ba8bc40f8d 100644 --- a/js_modules/dagster-ui/yarn.lock +++ b/js_modules/dagster-ui/yarn.lock @@ -3540,7 +3540,7 @@ __metadata: eslint-webpack-plugin: "npm:3.1.1" file-loader: "npm:^6.2.0" graphql: "npm:^16.8.1" - next: "npm:^14.2.10" + next: "npm:^14.2.15" prettier: "npm:^3.3.3" react: "npm:^18.3.1" react-dom: "npm:^18.3.1" @@ -5665,10 +5665,10 @@ __metadata: languageName: node linkType: hard -"@next/env@npm:14.2.12": - version: 14.2.12 - resolution: "@next/env@npm:14.2.12" - checksum: 10/9e1f36da7d794a29db42ebc68e24cc7ab19ab2d1fd86d6cdf872fac0f56cbce97d6df9ff43f526ec083c505feea716b86668c7fcc410d809ad136bb656a45d03 +"@next/env@npm:14.2.20": + version: 14.2.20 + resolution: "@next/env@npm:14.2.20" + checksum: 10/3aaf2ba16344d7cede12a846859fddffa172e951f2dc28bb66f8b7c24cb2c207d2a49c84fea965ae964714aeb2269cff7a91723b57631765f78fd02b9465d1f2 languageName: node linkType: hard @@ -5681,65 +5681,65 @@ __metadata: languageName: node linkType: hard -"@next/swc-darwin-arm64@npm:14.2.12": - version: 14.2.12 - resolution: "@next/swc-darwin-arm64@npm:14.2.12" +"@next/swc-darwin-arm64@npm:14.2.20": + version: 14.2.20 + resolution: "@next/swc-darwin-arm64@npm:14.2.20" conditions: os=darwin & cpu=arm64 languageName: node linkType: hard -"@next/swc-darwin-x64@npm:14.2.12": - version: 14.2.12 - resolution: "@next/swc-darwin-x64@npm:14.2.12" +"@next/swc-darwin-x64@npm:14.2.20": + version: 14.2.20 + resolution: "@next/swc-darwin-x64@npm:14.2.20" conditions: os=darwin & cpu=x64 languageName: node linkType: hard -"@next/swc-linux-arm64-gnu@npm:14.2.12": - version: 14.2.12 - resolution: "@next/swc-linux-arm64-gnu@npm:14.2.12" +"@next/swc-linux-arm64-gnu@npm:14.2.20": + version: 14.2.20 + resolution: "@next/swc-linux-arm64-gnu@npm:14.2.20" conditions: os=linux & cpu=arm64 & libc=glibc languageName: node linkType: hard -"@next/swc-linux-arm64-musl@npm:14.2.12": - version: 14.2.12 - resolution: "@next/swc-linux-arm64-musl@npm:14.2.12" +"@next/swc-linux-arm64-musl@npm:14.2.20": + version: 14.2.20 + resolution: "@next/swc-linux-arm64-musl@npm:14.2.20" conditions: os=linux & cpu=arm64 & libc=musl languageName: node linkType: hard -"@next/swc-linux-x64-gnu@npm:14.2.12": - version: 14.2.12 - resolution: "@next/swc-linux-x64-gnu@npm:14.2.12" +"@next/swc-linux-x64-gnu@npm:14.2.20": + version: 14.2.20 + resolution: "@next/swc-linux-x64-gnu@npm:14.2.20" conditions: os=linux & cpu=x64 & libc=glibc languageName: node linkType: hard -"@next/swc-linux-x64-musl@npm:14.2.12": - version: 14.2.12 - resolution: "@next/swc-linux-x64-musl@npm:14.2.12" +"@next/swc-linux-x64-musl@npm:14.2.20": + version: 14.2.20 + resolution: "@next/swc-linux-x64-musl@npm:14.2.20" conditions: os=linux & cpu=x64 & libc=musl languageName: node linkType: hard -"@next/swc-win32-arm64-msvc@npm:14.2.12": - version: 14.2.12 - resolution: "@next/swc-win32-arm64-msvc@npm:14.2.12" +"@next/swc-win32-arm64-msvc@npm:14.2.20": + version: 14.2.20 + resolution: "@next/swc-win32-arm64-msvc@npm:14.2.20" conditions: os=win32 & cpu=arm64 languageName: node linkType: hard -"@next/swc-win32-ia32-msvc@npm:14.2.12": - version: 14.2.12 - resolution: "@next/swc-win32-ia32-msvc@npm:14.2.12" +"@next/swc-win32-ia32-msvc@npm:14.2.20": + version: 14.2.20 + resolution: "@next/swc-win32-ia32-msvc@npm:14.2.20" conditions: os=win32 & cpu=ia32 languageName: node linkType: hard -"@next/swc-win32-x64-msvc@npm:14.2.12": - version: 14.2.12 - resolution: "@next/swc-win32-x64-msvc@npm:14.2.12" +"@next/swc-win32-x64-msvc@npm:14.2.20": + version: 14.2.20 + resolution: "@next/swc-win32-x64-msvc@npm:14.2.20" conditions: os=win32 & cpu=x64 languageName: node linkType: hard @@ -17811,20 +17811,20 @@ __metadata: languageName: node linkType: hard -"next@npm:^14.2.10": - version: 14.2.12 - resolution: "next@npm:14.2.12" +"next@npm:^14.2.15": + version: 14.2.20 + resolution: "next@npm:14.2.20" dependencies: - "@next/env": "npm:14.2.12" - "@next/swc-darwin-arm64": "npm:14.2.12" - "@next/swc-darwin-x64": "npm:14.2.12" - "@next/swc-linux-arm64-gnu": "npm:14.2.12" - "@next/swc-linux-arm64-musl": "npm:14.2.12" - "@next/swc-linux-x64-gnu": "npm:14.2.12" - "@next/swc-linux-x64-musl": "npm:14.2.12" - "@next/swc-win32-arm64-msvc": "npm:14.2.12" - "@next/swc-win32-ia32-msvc": "npm:14.2.12" - "@next/swc-win32-x64-msvc": "npm:14.2.12" + "@next/env": "npm:14.2.20" + "@next/swc-darwin-arm64": "npm:14.2.20" + "@next/swc-darwin-x64": "npm:14.2.20" + "@next/swc-linux-arm64-gnu": "npm:14.2.20" + "@next/swc-linux-arm64-musl": "npm:14.2.20" + "@next/swc-linux-x64-gnu": "npm:14.2.20" + "@next/swc-linux-x64-musl": "npm:14.2.20" + "@next/swc-win32-arm64-msvc": "npm:14.2.20" + "@next/swc-win32-ia32-msvc": "npm:14.2.20" + "@next/swc-win32-x64-msvc": "npm:14.2.20" "@swc/helpers": "npm:0.5.5" busboy: "npm:1.6.0" caniuse-lite: "npm:^1.0.30001579" @@ -17865,7 +17865,7 @@ __metadata: optional: true bin: next: dist/bin/next - checksum: 10/4dcae15547930cdaeb8a1d935dec3ab0c82a65347b0835988fd70fa5b108f1c301b75f98acf063c253858719e2969301fb2b0c30d6b2a46086ec19419430b119 + checksum: 10/baddcaeffa82e321cda87ad727540fc8ad639af5439ccc69b349c2b9a4315244d55c4aeed391c7bcd79edd634d6410b9e4a718ca02cc9e910046960444bb0c64 languageName: node linkType: hard diff --git a/pyright/alt-1/requirements-pinned.txt b/pyright/alt-1/requirements-pinned.txt index 6865024a2edaf..9c55cd90ee281 100644 --- a/pyright/alt-1/requirements-pinned.txt +++ b/pyright/alt-1/requirements-pinned.txt @@ -4,7 +4,7 @@ aiofile==3.9.0 aiohappyeyeballs==2.4.4 aiohttp==3.11.10 aioitertools==0.12.0 -aiosignal==1.3.1 +aiosignal==1.3.2 alembic==1.14.0 annotated-types==0.7.0 antlr4-python3-runtime==4.13.2 @@ -18,7 +18,7 @@ asn1crypto==1.5.1 astroid==3.3.6 asttokens==3.0.0 async-lru==2.0.4 -attrs==24.2.0 +attrs==24.3.0 babel==2.16.0 backoff==2.2.1 backports-tarfile==1.2.0 @@ -27,11 +27,11 @@ bleach==6.2.0 boto3==1.35.36 boto3-stubs-lite==1.35.70 botocore==1.35.36 -botocore-stubs==1.35.78 +botocore-stubs==1.35.82 buildkite-test-collector==0.1.9 cachetools==5.5.0 caio==0.9.17 -certifi==2024.8.30 +certifi==2024.12.14 cffi==1.17.1 chardet==5.2.0 charset-normalizer==3.4.0 @@ -41,7 +41,7 @@ coloredlogs==14.0 comm==0.2.2 contourpy==1.3.1 coverage==7.6.9 -croniter==3.0.4 +croniter==5.0.1 cryptography==44.0.0 cycler==0.12.1 daff==1.3.46 @@ -73,7 +73,7 @@ dbt-duckdb==1.9.1 dbt-extractor==0.5.1 dbt-semantic-interfaces==0.5.1 dbt-snowflake==1.9.0 -debugpy==1.8.9 +debugpy==1.8.11 decopatch==1.4.10 decorator==5.1.1 deepdiff==7.0.1 @@ -95,8 +95,8 @@ frozenlist==1.5.0 fsspec==2024.3.0 gcsfs==0.8.0 google-api-core==2.24.0 -google-api-python-client==2.154.0 -google-auth==2.36.0 +google-api-python-client==2.155.0 +google-auth==2.37.0 google-auth-httplib2==0.2.0 google-auth-oauthlib==1.2.1 google-cloud-bigquery==3.27.0 @@ -141,7 +141,7 @@ jsonschema==4.23.0 jsonschema-specifications==2024.10.1 jupyter-client==8.6.3 jupyter-core==5.7.2 -jupyter-events==0.10.0 +jupyter-events==0.11.0 jupyter-lsp==2.2.5 jupyter-server==2.14.2 jupyter-server-terminals==0.5.3 @@ -157,7 +157,7 @@ mako==1.3.8 markdown-it-py==3.0.0 markupsafe==3.0.2 mashumaro==3.14 -matplotlib==3.9.3 +matplotlib==3.10.0 matplotlib-inline==0.1.7 mccabe==0.7.0 mdurl==0.1.2 @@ -171,10 +171,10 @@ multimethod==1.12 mypy==1.13.0 mypy-boto3-ecs==1.35.77 mypy-boto3-emr==1.35.68 -mypy-boto3-emr-serverless==1.35.25 -mypy-boto3-glue==1.35.74 -mypy-boto3-logs==1.35.72 -mypy-boto3-s3==1.35.76.post1 +mypy-boto3-emr-serverless==1.35.79 +mypy-boto3-glue==1.35.80 +mypy-boto3-logs==1.35.81 +mypy-boto3-s3==1.35.81 mypy-extensions==1.0.0 mypy-protobuf==3.6.0 nbclient==0.10.1 @@ -231,14 +231,14 @@ pyproject-api==1.8.0 pyright==1.1.379 pyspark==3.5.3 pytest==8.3.4 -pytest-asyncio==0.24.0 +pytest-asyncio==0.25.0 pytest-cases==3.8.6 pytest-cov==5.0.0 pytest-mock==3.14.0 pytest-xdist==3.6.1 python-dateutil==2.9.0.post0 python-dotenv==1.0.1 -python-json-logger==2.0.7 +python-json-logger==3.2.1 python-slugify==8.0.4 pytimeparse==1.1.8 pytz==2024.2 @@ -275,7 +275,7 @@ sqlglot==26.0.0 sqlglotrs==0.3.0 sqlparse==0.5.3 stack-data==0.6.3 -starlette==0.41.3 +starlette==0.42.0 structlog==24.4.0 syrupy==4.8.0 tabulate==0.9.0 @@ -292,7 +292,7 @@ tqdm==4.67.1 traitlets==5.14.3 typeguard==4.4.1 typer==0.15.1 -types-awscrt==0.23.4 +types-awscrt==0.23.5 types-backports==0.1.3 types-certifi==2021.10.8.3 types-cffi==1.16.0.20240331 @@ -318,11 +318,11 @@ typing-extensions==4.12.2 typing-inspect==0.9.0 tzdata==2024.2 ujson==5.10.0 -universal-pathlib==0.2.5 +universal-pathlib==0.2.6 uri-template==1.3.0 uritemplate==4.1.1 urllib3==2.2.3 -uvicorn==0.32.1 +uvicorn==0.34.0 uvloop==0.21.0 virtualenv==20.28.0 watchdog==5.0.3 diff --git a/pyright/master/requirements-pinned.txt b/pyright/master/requirements-pinned.txt index 18552e740a2a3..f3383f88c3e96 100644 --- a/pyright/master/requirements-pinned.txt +++ b/pyright/master/requirements-pinned.txt @@ -5,7 +5,7 @@ aiohappyeyeballs==2.4.4 aiohttp==3.10.11 aiohttp-retry==2.8.3 aioresponses==0.7.7 -aiosignal==1.3.1 +aiosignal==1.3.2 alabaster==1.0.0 alembic==1.14.0 altair==4.2.2 @@ -36,7 +36,8 @@ asn1crypto==1.5.1 -e examples/assets_pandas_pyspark asttokens==3.0.0 async-lru==2.0.4 -attrs==24.2.0 +atproto==0.0.56 +attrs==24.3.0 autodocsumm==0.2.14 autoflake==2.3.1 -e python_modules/automation @@ -57,10 +58,10 @@ billiard==4.2.1 bleach==6.2.0 blinker==1.9.0 bokeh==3.6.2 -boto3==1.35.78 +boto3==1.35.82 boto3-stubs-lite==1.35.70 -botocore==1.35.78 -botocore-stubs==1.35.78 +botocore==1.35.82 +botocore-stubs==1.35.82 buildkite-test-collector==0.1.9 cachecontrol==0.14.1 cached-property==2.0.1 @@ -70,9 +71,9 @@ caio==0.9.17 callee==0.3.1 cattrs==23.1.2 celery==5.4.0 -certifi==2024.8.30 +certifi==2024.12.14 cffi==1.17.1 -cfn-lint==1.22.0 +cfn-lint==1.22.2 chardet==5.2.0 charset-normalizer==3.4.0 click==8.1.7 @@ -93,8 +94,8 @@ connexion==2.14.2 contourpy==1.3.1 coverage==7.6.9 cron-descriptor==1.4.5 -croniter==3.0.4 -cryptography==44.0.0 +croniter==5.0.1 +cryptography==43.0.3 cssselect==1.2.0 cssutils==2.11.1 cycler==0.12.1 @@ -121,6 +122,7 @@ dagster-contrib-modal==0.0.2 -e python_modules/libraries/dagster-deltalake -e python_modules/libraries/dagster-deltalake-pandas -e python_modules/libraries/dagster-deltalake-polars +-e python_modules/libraries/dagster-dg -e examples/experimental/dagster-dlift -e python_modules/libraries/dagster-docker -e python_modules/libraries/dagster-duckdb @@ -189,7 +191,7 @@ dbt-duckdb==1.9.1 -e examples/starlift-demo dbt-extractor==0.5.1 dbt-semantic-interfaces==0.5.1 -debugpy==1.8.9 +debugpy==1.8.11 decopatch==1.4.10 decorator==5.1.1 deepdiff==7.0.1 @@ -221,7 +223,7 @@ execnet==2.1.1 executing==2.1.0 expandvars==0.12.0 faiss-cpu==1.8.0 -fastapi==0.115.6 +fastapi==0.1.17 fastavro==1.9.7 fastjsonschema==2.21.1 -e examples/feature_graph_backed_assets @@ -250,8 +252,8 @@ gitdb==4.0.11 gitpython==3.1.43 giturlparse==0.12.0 google-api-core==2.24.0 -google-api-python-client==2.154.0 -google-auth==2.36.0 +google-api-python-client==2.155.0 +google-auth==2.37.0 google-auth-httplib2==0.2.0 google-auth-oauthlib==1.2.1 google-cloud-bigquery==3.27.0 @@ -281,7 +283,7 @@ html5lib==1.1 httpcore==1.0.7 httplib2==0.22.0 httptools==0.6.4 -httpx==0.28.1 +httpx==0.27.2 httpx-sse==0.4.0 humanfriendly==10.0 humanize==4.11.0 @@ -321,7 +323,7 @@ jsonschema-path==0.3.3 jsonschema-specifications==2023.12.1 jupyter-client==7.4.9 jupyter-core==5.7.2 -jupyter-events==0.10.0 +jupyter-events==0.11.0 jupyter-lsp==2.2.5 jupyter-server==2.14.2 jupyter-server-terminals==0.5.3 @@ -333,17 +335,18 @@ keyring==25.5.0 -e python_modules/libraries/dagster-airlift/kitchen-sink kiwisolver==1.4.7 kombu==5.4.2 -kopf==1.37.3 +kopf==1.37.4 kubernetes==31.0.0 kubernetes-asyncio==31.1.1 langchain==0.3.7 langchain-community==0.3.5 -langchain-core==0.3.24 +langchain-core==0.3.25 langchain-openai==0.2.5 -langchain-text-splitters==0.3.2 +langchain-text-splitters==0.3.3 langsmith==0.1.147 lazy-object-proxy==1.10.0 leather==0.4.0 +libipld==3.0.0 limits==3.14.1 linkify-it-py==2.0.3 lkml==1.3.6 @@ -361,7 +364,7 @@ marshmallow==3.23.1 marshmallow-oneofschema==3.1.1 marshmallow-sqlalchemy==0.26.1 mashumaro==3.15 -matplotlib==3.9.3 +matplotlib==3.10.0 matplotlib-inline==0.1.3 mbstrdecoder==1.1.3 mdit-py-plugins==0.4.2 @@ -370,7 +373,7 @@ minimal-snowplow-tracker==0.0.2 mistune==3.0.2 mixpanel==4.10.1 mlflow==1.27.0 -modal==0.67.46 +modal==0.68.26 more-itertools==10.5.0 morefs==0.2.2 moto==4.2.14 @@ -382,10 +385,10 @@ multidict==6.1.0 multimethod==1.12 mypy-boto3-ecs==1.35.77 mypy-boto3-emr==1.35.68 -mypy-boto3-emr-serverless==1.35.25 -mypy-boto3-glue==1.35.74 -mypy-boto3-logs==1.35.72 -mypy-boto3-s3==1.35.76.post1 +mypy-boto3-emr-serverless==1.35.79 +mypy-boto3-glue==1.35.80 +mypy-boto3-logs==1.35.81 +mypy-boto3-s3==1.35.81 mypy-extensions==1.0.0 mypy-protobuf==3.6.0 mysql-connector-python==9.1.0 @@ -395,7 +398,7 @@ nbconvert==7.16.4 nbformat==5.10.4 nest-asyncio==1.6.0 networkx==3.4.2 -nh3==0.2.19 +nh3==0.2.20 nodeenv==1.9.1 notebook==7.3.1 notebook-shim==0.2.4 @@ -406,7 +409,7 @@ objgraph==3.6.2 onnx==1.17.0 onnxconverter-common==1.13.0 onnxruntime==1.20.1 -openai==1.57.2 +openai==1.57.4 openapi-schema-validator==0.6.2 openapi-spec-validator==0.7.1 opentelemetry-api==1.29.0 @@ -451,6 +454,7 @@ portalocker==2.10.1 premailer==3.10.0 prison==0.2.1 progressbar2==4.5.0 +-e examples/project_atproto_dashboard -e examples/project_dagster_modal_pipes prometheus-client==0.21.1 prometheus-flask-exporter==0.23.1 @@ -471,7 +475,7 @@ pyasn1-modules==0.4.1 pycparser==2.22 pydantic==2.10.3 pydantic-core==2.27.1 -pydantic-settings==2.6.1 +pydantic-settings==2.7.0 pydata-google-auth==1.9.0 pyflakes==3.2.0 pygments==2.18.0 @@ -487,7 +491,7 @@ pysocks==1.7.1 pyspark==3.5.3 pytablereader==0.31.4 pytest==8.3.4 -pytest-asyncio==0.24.0 +pytest-asyncio==0.25.0 pytest-cases==3.8.6 pytest-cov==5.0.0 pytest-mock==3.14.0 @@ -497,9 +501,9 @@ python-dateutil==2.9.0.post0 python-dotenv==1.0.1 python-frontmatter==1.1.0 python-jose==3.3.0 -python-json-logger==2.0.7 +python-json-logger==3.2.1 python-liquid==1.12.1 -python-multipart==0.0.19 +python-multipart==0.0.20 python-nvd3==0.16.0 python-slugify==8.0.4 python-utils==3.9.1 @@ -529,7 +533,7 @@ rpds-py==0.22.3 rsa==4.9 ruamel-yaml==0.17.40 ruamel-yaml-clib==0.2.12 -ruff==0.8.2 +ruff==0.8.3 s3transfer==0.10.4 scikit-learn==1.6.0 scipy==1.14.1 @@ -550,8 +554,8 @@ six==1.17.0 skein==0.8.2 skl2onnx==1.17.0 slack-sdk==3.33.5 -sling==1.3.2 -sling-mac-arm64==1.3.2 +sling==1.3.3 +sling-mac-arm64==1.3.3 smmap==5.0.1 sniffio==1.3.1 snowballstemmer==2.2.0 @@ -580,10 +584,10 @@ sqlparse==0.5.3 sshpubkeys==3.3.1 sshtunnel==0.4.0 stack-data==0.6.3 -starlette==0.41.3 +starlette==0.42.0 structlog==24.4.0 sympy==1.13.1 -synchronicity==0.9.5 +synchronicity==0.9.6 syrupy==4.8.0 tableauserverclient==0.34 tabledata==1.3.3 @@ -611,12 +615,12 @@ trio==0.27.0 trio-websocket==0.11.1 -e examples/airlift-migration-tutorial -e examples/tutorial_notebook_assets -twilio==9.3.8 +twilio==9.4.1 twine==6.0.1 typeguard==4.4.1 typepy==1.3.2 typer==0.15.1 -types-awscrt==0.23.4 +types-awscrt==0.23.5 types-backports==0.1.3 types-certifi==2021.10.8.3 types-cffi==1.16.0.20240331 @@ -645,23 +649,23 @@ tzdata==2024.2 tzlocal==5.2 uc-micro-py==1.0.3 unicodecsv==0.14.1 -universal-pathlib==0.2.5 +universal-pathlib==0.2.6 uri-template==1.3.0 uritemplate==4.1.1 urllib3==2.2.3 -e examples/use_case_repository -uvicorn==0.32.1 +uvicorn==0.34.0 uvloop==0.21.0 vine==5.1.0 virtualenv==20.28.0 -wandb==0.19.0 +wandb==0.19.1 watchdog==5.0.3 watchfiles==1.0.3 wcwidth==0.2.13 webcolors==24.11.1 webencodings==0.5.1 websocket-client==1.8.0 -websockets==14.1 +websockets==13.1 werkzeug==2.2.3 wheel==0.45.1 widgetsnbextension==4.0.13 diff --git a/pyright/master/requirements.txt b/pyright/master/requirements.txt index fe9ac899fec0a..1d9b6d64ff895 100644 --- a/pyright/master/requirements.txt +++ b/pyright/master/requirements.txt @@ -52,6 +52,7 @@ -e python_modules/libraries/dagster-deltalake/ -e python_modules/libraries/dagster-deltalake-pandas/ -e python_modules/libraries/dagster-deltalake-polars/ +-e python_modules/libraries/dagster-dg/ -e python_modules/libraries/dagster-docker/ -e python_modules/libraries/dagster-duckdb/ -e python_modules/libraries/dagster-duckdb-pandas/ @@ -141,4 +142,5 @@ types-sqlalchemy==1.4.53.34 -e python_modules/libraries/dagster-airlift/perf-harness -e examples/airlift-migration-tutorial -e examples/use_case_repository[dev] +-e examples/project_atproto_dashboard -e examples/project_dagster_modal_pipes diff --git a/python_modules/automation/automation_tests/test_repo.py b/python_modules/automation/automation_tests/test_repo.py index e7e2aaabe7df7..1e3cc83667b36 100644 --- a/python_modules/automation/automation_tests/test_repo.py +++ b/python_modules/automation/automation_tests/test_repo.py @@ -2,9 +2,10 @@ import subprocess from pathlib import Path -# Some libraries are excluded because they lack a Dagster dependency, which is a prerequisite for -# registering in the DagsterLibraryRegistry. -EXCLUDE_LIBRARIES = ["dagster-dg"] +# Some libraries are excluded because they either: +# - lack a Dagster dependency, which is a prerequisite for registering in the DagsterLibraryRegistry. +# - are temporary or on a separate release schedule from the rest of the libraries. +EXCLUDE_LIBRARIES = ["dagster-components", "dagster-dg"] def test_all_libraries_register() -> None: diff --git a/python_modules/dagster-graphql/dagster_graphql/implementation/events.py b/python_modules/dagster-graphql/dagster_graphql/implementation/events.py index a49b91447141a..7e8fd1be4dca6 100644 --- a/python_modules/dagster-graphql/dagster_graphql/implementation/events.py +++ b/python_modules/dagster-graphql/dagster_graphql/implementation/events.py @@ -232,6 +232,7 @@ def from_dagster_event_record(event_record: EventLogEntry, pipeline_name: str) - GrapheneExecutionStepStartEvent, GrapheneExecutionStepSuccessEvent, GrapheneExecutionStepUpForRetryEvent, + GrapheneExpectationResult, GrapheneHandledOutputEvent, GrapheneHookCompletedEvent, GrapheneHookErroredEvent, @@ -306,7 +307,7 @@ def from_dagster_event_record(event_record: EventLogEntry, pipeline_name: str) - elif dagster_event.event_type == DagsterEventType.STEP_EXPECTATION_RESULT: data = cast(StepExpectationResultData, dagster_event.event_specific_data) return GrapheneStepExpectationResultEvent( - expectation_result=data.expectation_result, **basic_params + expectation_result=GrapheneExpectationResult(data.expectation_result), **basic_params ) elif dagster_event.event_type == DagsterEventType.STEP_FAILURE: data = dagster_event.step_failure_data diff --git a/python_modules/dagster-graphql/dagster_graphql/schema/logs/events.py b/python_modules/dagster-graphql/dagster_graphql/schema/logs/events.py index 24e54116e4e0e..46a1dbc8bf0e2 100644 --- a/python_modules/dagster-graphql/dagster_graphql/schema/logs/events.py +++ b/python_modules/dagster-graphql/dagster_graphql/schema/logs/events.py @@ -2,6 +2,7 @@ import dagster._check as check import graphene +from dagster._core.definitions import ExpectationResult from dagster._core.events import AssetLineageInfo, DagsterEventType from dagster._core.events.log import EventLogEntry from dagster._core.execution.plan.objects import ErrorSource @@ -209,10 +210,18 @@ class Meta: interfaces = (GrapheneDisplayableEvent,) name = "ExpectationResult" + def __init__(self, expectation_result: ExpectationResult): + self._expectation_result = expectation_result + super().__init__( + success=expectation_result.success, + label=expectation_result.label, + description=expectation_result.description, + ) + def resolve_metadataEntries(self, _graphene_info: ResolveInfo): from dagster_graphql.implementation.events import _to_metadata_entries - return _to_metadata_entries(self.metadata) + return _to_metadata_entries(self._expectation_result.metadata) class GrapheneTypeCheck(graphene.ObjectType): @@ -635,8 +644,12 @@ def __init__(self, stats): status=stats.status.value, startTime=stats.start_time, endTime=stats.end_time, - materializations=stats.materialization_events, - expectationResults=stats.expectation_results, + materializations=[ + GrapheneMaterializationEvent(event) for event in stats.materialization_events + ], + expectationResults=[ + GrapheneExpectationResult(result) for result in stats.expectation_results + ], attempts=[ GrapheneRunMarker(startTime=attempt.start_time, endTime=attempt.end_time) for attempt in stats.attempts_list diff --git a/python_modules/dagster-graphql/dagster_graphql/schema/solids.py b/python_modules/dagster-graphql/dagster_graphql/schema/solids.py index c6ce0836b408e..0a1a58b15133a 100644 --- a/python_modules/dagster-graphql/dagster_graphql/schema/solids.py +++ b/python_modules/dagster-graphql/dagster_graphql/schema/solids.py @@ -373,7 +373,7 @@ class GrapheneISolidDefinition(graphene.Interface): metadata = non_null_list(GrapheneMetadataItemDefinition) input_definitions = non_null_list(GrapheneInputDefinition) output_definitions = non_null_list(GrapheneOutputDefinition) - asset_nodes = non_null_list("dagster_graphql.schema.asset_graph.GrapheneAssetNode") + assetNodes = non_null_list("dagster_graphql.schema.asset_graph.GrapheneAssetNode") class Meta: name = "ISolidDefinition" @@ -424,10 +424,7 @@ def resolve_output_definitions(self, _graphene_info): for output_def_snap in self._solid_def_snap.output_def_snaps ] - def resolve_asset_nodes(self, graphene_info: ResolveInfo) -> Sequence["GrapheneAssetNode"]: - # NOTE: This is a temporary hack. We really should prob be resolving solids against the repo - # rather than pipeline, that way we would not have to refetch the repo here here in order to - # access the asset nodes. + def resolve_assetNodes(self, graphene_info: ResolveInfo) -> Sequence["GrapheneAssetNode"]: from dagster_graphql.schema.asset_graph import GrapheneAssetNode # This is a workaround for the fact that asset info is not persisted in pipeline snapshots. @@ -435,13 +432,12 @@ def resolve_asset_nodes(self, graphene_info: ResolveInfo) -> Sequence["GrapheneA return [] else: assert isinstance(self._represented_pipeline, RemoteJob) - repo_handle = self._represented_pipeline.repository_handle - origin = repo_handle.code_location_origin - location = graphene_info.context.get_code_location(origin.location_name) - ext_repo = location.get_repository(repo_handle.repository_name) + job_asset_nodes = graphene_info.context.get_assets_in_job( + self._represented_pipeline.handle.to_selector() + ) remote_nodes = [ remote_node - for remote_node in ext_repo.asset_graph.asset_nodes + for remote_node in job_asset_nodes if ( (remote_node.asset_node_snap.node_definition_name == self.solid_def_name) or ( @@ -450,21 +446,20 @@ def resolve_asset_nodes(self, graphene_info: ResolveInfo) -> Sequence["GrapheneA ) ) ] - + differ = None base_deployment_context = graphene_info.context.get_base_deployment_context() + if base_deployment_context: + differ = AssetGraphDiffer.from_remote_repositories( + code_location_name=self._represented_pipeline.handle.location_name, + repository_name=self._represented_pipeline.handle.repository_name, + branch_workspace=graphene_info.context, + base_workspace=base_deployment_context, + ) return [ GrapheneAssetNode( remote_node=remote_node, - # base_deployment_context will be None if we are not in a branch deployment - asset_graph_differ=AssetGraphDiffer.from_remote_repositories( - code_location_name=location.name, - repository_name=ext_repo.name, - branch_workspace=graphene_info.context, - base_workspace=base_deployment_context, - ) - if base_deployment_context is not None - else None, + asset_graph_differ=differ, ) for remote_node in remote_nodes ] diff --git a/python_modules/dagster-graphql/dagster_graphql_tests/graphql/test_assets.py b/python_modules/dagster-graphql/dagster_graphql_tests/graphql/test_assets.py index 0901f0b9af9da..06c3943b75d91 100644 --- a/python_modules/dagster-graphql/dagster_graphql_tests/graphql/test_assets.py +++ b/python_modules/dagster-graphql/dagster_graphql_tests/graphql/test_assets.py @@ -818,6 +818,29 @@ } """ +GET_MATERIALIZATIONS_FROM_STEP_STATS = """ +query MaterializationsFromStepStatsQuery($runId: ID!) { + runOrError(runId: $runId) { + ... on PythonError { + className + message + stack + } + ... on Run { + stepStats { + materializations { + eventType + message + assetLineage { + partitions + } + } + } + } + } +} +""" + def _create_run( graphql_context: WorkspaceRequestContext, @@ -2016,6 +2039,14 @@ def test_get_run_materialization(self, graphql_context: WorkspaceRequestContext, assert len(result.data["runsOrError"]["results"][0]["assetMaterializations"]) == 1 snapshot.assert_match(result.data) + def test_get_materializations_from_step_stats(self, graphql_context: WorkspaceRequestContext): + run_id = _create_run(graphql_context, "single_asset_job") + result = execute_dagster_graphql( + graphql_context, GET_MATERIALIZATIONS_FROM_STEP_STATS, {"runId": run_id} + ) + assert result.data + assert len(result.data["runOrError"]["stepStats"][0]["materializations"]) == 1 + def test_asset_selection_in_run(self, graphql_context: WorkspaceRequestContext): # Generate materializations for bar asset run_id = _create_run(graphql_context, "foo_job", asset_selection=[{"path": ["bar"]}]) diff --git a/python_modules/dagster-graphql/dagster_graphql_tests/graphql/test_expectations.py b/python_modules/dagster-graphql/dagster_graphql_tests/graphql/test_expectations.py index bb18319c94deb..2746be44f06c3 100644 --- a/python_modules/dagster-graphql/dagster_graphql_tests/graphql/test_expectations.py +++ b/python_modules/dagster-graphql/dagster_graphql_tests/graphql/test_expectations.py @@ -1,14 +1,68 @@ import json +from dagster._core.test_utils import poll_for_finished_run from dagster._core.workspace.context import WorkspaceRequestContext from dagster._utils import file_relative_path -from dagster_graphql.test.utils import infer_job_selector +from dagster_graphql.client.query import LAUNCH_PIPELINE_EXECUTION_MUTATION +from dagster_graphql.test.utils import execute_dagster_graphql, infer_job_selector from dagster_graphql_tests.graphql.graphql_context_test_suite import ( ExecutingGraphQLContextTestMatrix, ) from dagster_graphql_tests.graphql.utils import sync_execute_get_events +GET_EXPECTATIONS_FROM_STEP_STATS = """ +query MaterializationsFromStepStatsQuery($runId: ID!) { + runOrError(runId: $runId) { + ... on PythonError { + className + message + stack + } + ... on Run { + stepStats { + expectationResults { + success + label + description + metadataEntries { + ... on TextMetadataEntry { + text + } + ... on JsonMetadataEntry { + jsonString + } + } + } + } + } + } +} +""" + + +def _create_run( + graphql_context: WorkspaceRequestContext, + pipeline_name: str, +) -> str: + selector = infer_job_selector( + graphql_context, + pipeline_name, + ) + result = execute_dagster_graphql( + graphql_context, + LAUNCH_PIPELINE_EXECUTION_MUTATION, + variables={ + "executionParams": { + "selector": selector, + } + }, + ) + assert result.data["launchPipelineExecution"]["__typename"] == "LaunchRunSuccess" + run_id = result.data["launchPipelineExecution"]["run"]["runId"] + poll_for_finished_run(graphql_context.instance, run_id) + return run_id + def get_expectation_results(logs, op_name: str): def _f(): @@ -70,6 +124,28 @@ def test_basic_expectations_within_compute_step_events( get_expectation_results(logs, "emit_successful_expectation_no_metadata") ) + def test_get_expectation_results_from_step_stats( + self, graphql_context: WorkspaceRequestContext + ): + run_id = _create_run(graphql_context, "job_with_expectations") + result = execute_dagster_graphql( + graphql_context, GET_EXPECTATIONS_FROM_STEP_STATS, {"runId": run_id} + ) + assert result.data + assert any( + len(step["expectationResults"]) > 0 + and step["expectationResults"][0] + == { + "success": False, + "label": "always_false", + "description": "Failure", + "metadataEntries": [ + {"jsonString": json.dumps({"reason": "Relentless pessimism."})} + ], + } + for step in result.data["runOrError"]["stepStats"] + ) + def test_basic_input_output_expectations( self, graphql_context: WorkspaceRequestContext, snapshot ): diff --git a/python_modules/dagster/dagster/_core/definitions/antlr_asset_selection/antlr_asset_selection.py b/python_modules/dagster/dagster/_core/definitions/antlr_asset_selection/antlr_asset_selection.py index 0ef314d766fed..28ac370a3d30e 100644 --- a/python_modules/dagster/dagster/_core/definitions/antlr_asset_selection/antlr_asset_selection.py +++ b/python_modules/dagster/dagster/_core/definitions/antlr_asset_selection/antlr_asset_selection.py @@ -108,7 +108,7 @@ def visitKeySubstringExpr(self, ctx: AssetSelectionParser.KeySubstringExprContex def visitTagAttributeExpr(self, ctx: AssetSelectionParser.TagAttributeExprContext): key = self.visit(ctx.value(0)) value = self.visit(ctx.value(1)) if ctx.EQUAL() else "" - return AssetSelection.tag(key, value) + return AssetSelection.tag(key, value, include_sources=self.include_sources) def visitOwnerAttributeExpr(self, ctx: AssetSelectionParser.OwnerAttributeExprContext): owner = self.visit(ctx.value()) @@ -116,11 +116,11 @@ def visitOwnerAttributeExpr(self, ctx: AssetSelectionParser.OwnerAttributeExprCo def visitGroupAttributeExpr(self, ctx: AssetSelectionParser.GroupAttributeExprContext): group = self.visit(ctx.value()) - return AssetSelection.groups(group) + return AssetSelection.groups(group, include_sources=self.include_sources) def visitKindAttributeExpr(self, ctx: AssetSelectionParser.KindAttributeExprContext): kind = self.visit(ctx.value()) - return AssetSelection.tag(f"{KIND_PREFIX}{kind}", "") + return AssetSelection.tag(f"{KIND_PREFIX}{kind}", "", include_sources=self.include_sources) def visitCodeLocationAttributeExpr( self, ctx: AssetSelectionParser.CodeLocationAttributeExprContext diff --git a/python_modules/dagster/dagster/_core/definitions/asset_check_factories/freshness_checks/sensor.py b/python_modules/dagster/dagster/_core/definitions/asset_check_factories/freshness_checks/sensor.py index 943e9b8094f01..34bb66266296f 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_check_factories/freshness_checks/sensor.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_check_factories/freshness_checks/sensor.py @@ -19,6 +19,7 @@ SensorEvaluationContext, ) from dagster._core.storage.asset_check_execution_record import AssetCheckExecutionRecordStatus +from dagster._core.storage.tags import SENSOR_NAME_TAG from dagster._time import get_current_datetime, get_current_timestamp DEFAULT_FRESHNESS_SENSOR_NAME = "freshness_checks_sensor" @@ -162,42 +163,105 @@ def freshness_checks_get_evaluations_iter( yield check_key, True continue - # Case 2: The check is currently evaluating. We shouldn't kick off another evaluation until it's done. - if ( - summary_record.last_check_execution_record.status - == AssetCheckExecutionRecordStatus.PLANNED - ): - context.log.info( - f"Freshness check on asset {check_key.asset_key.to_user_string()} is in the planned state, indicating it is currently evaluating. Skipping..." + # Case 2: The check is currently evaluating and has never previously evaluated. We shouldn't kick off another evaluation until it's done. + if summary_record.last_completed_check_execution_record is None: + check.invariant( + summary_record.last_check_execution_record.status + == AssetCheckExecutionRecordStatus.PLANNED, + f"Unexpected status for check {check_key.to_user_string()}. The summary record indicates that the check has never completed, but the last check execution record is in terminal state {summary_record.last_check_execution_record.status}. This is likely a framework error, please report this to the Dagster maintainers.", ) - yield check_key, False - continue - evaluation = check.not_none( - summary_record.last_check_execution_record.event - ).asset_check_evaluation - # Case 3: The check previously failed. We shouldn't kick off another evaluation until the asset has been updated. - if not evaluation or not evaluation.passed: context.log.info( - f"Freshness check {check_key.to_user_string()} failed its last evaluation. Waiting " - "to re-evaluate until the asset has received an update." + f"Freshness check on asset {check_key.asset_key.to_user_string()} is currently evaluating for the first time. Skipping..." ) yield check_key, False continue - # Case 4: The check previously passed. We should re-evaluate if it's possible for the check to be overdue again. - next_deadline = cast(float, evaluation.metadata[FRESH_UNTIL_METADATA_KEY].value) - if next_deadline < start_time.timestamp(): - context.log.info( - f"Freshness check {check_key.to_user_string()} previously passed, but " - "enough time has passed that it can be overdue again. Adding to run request." + + latest_completed_record = check.not_none( + summary_record.last_completed_check_execution_record + ) + latest_record_any_status = check.not_none(summary_record.last_check_execution_record) + latest_completed_evaluation = check.not_none( + check.not_none(latest_completed_record.event).asset_check_evaluation + ) + # Case 3: The check is currently evaluating and the previous evaluation passed. + # - If the check is overdue and the current evaluation was not kicked off by the sensor, we should kick off another evaluation. + # - If the check previously failed, we'll wait for the current evaluation to complete to avoid over-evaluation. + if ( + latest_record_any_status.status == AssetCheckExecutionRecordStatus.PLANNED + and latest_completed_evaluation.passed + ): + run_record = context.instance.get_run_record_by_id(latest_record_any_status.run_id) + # Previous run was kicked off by the sensor, wait for it to complete before kicking off another run. + if ( + run_record + and run_record.dagster_run.tags.get(SENSOR_NAME_TAG) == context.sensor_name + ): + context.log.info( + f"Freshness check {check_key.to_user_string()} is currently evaluating, and was kicked off by this sensor. Skipping..." + ) + yield check_key, False + continue + # Previous run was not kicked off by the sensor, check if it's overdue. + next_deadline = cast( + float, latest_completed_evaluation.metadata[FRESH_UNTIL_METADATA_KEY].value ) - yield check_key, True - continue + if next_deadline < start_time.timestamp(): + context.log.info( + f"Freshness check {check_key.to_user_string()} is currently evaluating, but " + "enough time has passed that it can be overdue again. Adding to run request." + ) + yield check_key, True + continue + else: + how_long_until_next_deadline = next_deadline - start_time.timestamp() + context.log.info( + f"Freshness check {check_key.to_user_string()} is currently evaluating, but " + f"cannot be overdue again until {seconds_in_words(how_long_until_next_deadline)} from now. Skipping..." + ) + yield check_key, False + continue + # Case 4: The previous completed evaluation failed. We should kick off another only if the asset has been updated since it's last evaluation. + elif not latest_completed_evaluation.passed: + latest_materialization = context.instance.get_latest_materialization_event( + check_key.asset_key + ) + # If the asset has been updated since the last evaluation, we should re-evaluate the check. + if ( + latest_materialization + and latest_materialization.timestamp + > check.not_none(latest_completed_record.event).timestamp + ): + context.log.info( + f"Freshness check {check_key.to_user_string()} previously failed, but " + "the asset has been updated since the last evaluation. Adding to run request." + ) + yield check_key, True + continue + else: + context.log.info( + f"Freshness check {check_key.to_user_string()} previously failed, but " + "the asset has not been updated since the last evaluation. Skipping..." + ) + yield check_key, False + continue + # Case 5: The previous evaluation passed and there is no in progress evaluation. We should kick off another evaluation only if the check is overdue. else: - how_long_until_next_deadline = next_deadline - start_time.timestamp() - context.log.info( - f"Freshness check {check_key.to_user_string()} previously passed, but " - f"cannot be overdue again until {seconds_in_words(how_long_until_next_deadline)} from now. Skipping..." + next_deadline = cast( + float, latest_completed_evaluation.metadata[FRESH_UNTIL_METADATA_KEY].value ) - yield check_key, False - continue + if next_deadline < start_time.timestamp(): + context.log.info( + f"Freshness check {check_key.to_user_string()} previously passed, but " + "enough time has passed that it can be overdue again. Adding to run request." + ) + yield check_key, True + continue + else: + how_long_until_next_deadline = next_deadline - start_time.timestamp() + context.log.info( + f"Freshness check {check_key.to_user_string()} previously passed, but " + f"cannot be overdue again until {seconds_in_words(how_long_until_next_deadline)} from now. Skipping..." + ) + yield check_key, False + continue diff --git a/python_modules/dagster/dagster/_core/definitions/assets.py b/python_modules/dagster/dagster/_core/definitions/assets.py index d616fb0361f8d..064507025685d 100644 --- a/python_modules/dagster/dagster/_core/definitions/assets.py +++ b/python_modules/dagster/dagster/_core/definitions/assets.py @@ -74,7 +74,7 @@ ) from dagster._core.errors import DagsterInvalidDefinitionError, DagsterInvariantViolationError from dagster._utils import IHasInternalInit -from dagster._utils.merger import merge_dicts +from dagster._utils.merger import merge_dicts, reverse_dict from dagster._utils.security import non_secure_md5_hash_str from dagster._utils.tags import normalize_tags from dagster._utils.warnings import ExperimentalWarning, disable_dagster_warnings @@ -85,6 +85,10 @@ ASSET_SUBSET_INPUT_PREFIX = "__subset_input__" +def stringify_asset_key_to_input_name(asset_key: AssetKey) -> str: + return "_".join(asset_key.path).replace("-", "_") + + class AssetsDefinition(ResourceAddable, IHasInternalInit): """Defines a set of assets that are produced by the same op or graph. @@ -932,6 +936,10 @@ def node_keys_by_input_name(self) -> Mapping[str, AssetKey]: """AssetKey for each input on the underlying NodeDefinition.""" return self._computation.keys_by_input_name if self._computation else {} + @property + def input_names_by_node_key(self) -> Mapping[AssetKey, str]: + return {key: input_name for input_name, key in self.node_keys_by_input_name.items()} + @property def node_check_specs_by_output_name(self) -> Mapping[str, AssetCheckSpec]: """AssetCheckSpec for each output on the underlying NodeDefinition.""" @@ -1293,20 +1301,10 @@ def map_asset_specs(self, fn: Callable[[AssetSpec], AssetSpec]) -> "AssetsDefini f"Asset key {spec.key.to_user_string()} was changed to " f"{mapped_spec.key.to_user_string()}. Mapping function must not change keys." ) - if ( - # check reference equality first for performance - mapped_spec.deps is not spec.deps and mapped_spec.deps != spec.deps - ): - raise DagsterInvalidDefinitionError( - f"Asset deps {spec.deps} were changed to {mapped_spec.deps}. Mapping function " - "must not change deps." - ) mapped_specs.append(mapped_spec) - return self.__class__.dagster_internal_init( - **{**self.get_attributes_dict(), "specs": mapped_specs} - ) + return replace_specs_on_asset(self, mapped_specs) def subset_for( self, @@ -1897,3 +1895,64 @@ def unique_id_from_asset_and_check_keys(entity_keys: Iterable["EntityKey"]) -> s """ sorted_key_strs = sorted(str(key) for key in entity_keys) return non_secure_md5_hash_str(json.dumps(sorted_key_strs).encode("utf-8"))[:8] + + +def replace_specs_on_asset( + assets_def: AssetsDefinition, replaced_specs: Sequence[AssetSpec] +) -> "AssetsDefinition": + from dagster._builtins import Nothing + from dagster._core.definitions.input import In + + new_deps = set().union(*(spec.deps for spec in replaced_specs)) + previous_deps = set().union(*(spec.deps for spec in assets_def.specs)) + added_deps = new_deps - previous_deps + removed_deps = previous_deps - new_deps + remaining_original_deps = previous_deps - removed_deps + original_key_to_input_mapping = reverse_dict(assets_def.node_keys_by_input_name) + + # If there are no changes to the dependency structure, we don't need to make any changes to the underlying node. + if not assets_def.is_executable or (not added_deps and not removed_deps): + return assets_def.__class__.dagster_internal_init( + **{**assets_def.get_attributes_dict(), "specs": replaced_specs} + ) + + # Otherwise, there are changes to the dependency structure. We need to update the node_def. + # Graph-backed assets do not currently support non-argument-based deps. Every argument to a graph-backed asset + # must map to an an input on an internal asset node in the graph structure. + # IMPROVEME BUILD-529 + check.invariant( + isinstance(assets_def.node_def, OpDefinition), + "Can only add additional deps to an op-backed asset.", + ) + # for each deleted dep, we need to make sure it is not an argument-based dep. Argument-based deps cannot be removed. + for dep in removed_deps: + input_name = original_key_to_input_mapping[dep.asset_key] + input_def = assets_def.node_def.input_def_named(input_name) + check.invariant( + input_def.dagster_type.is_nothing, + f"Attempted to remove argument-backed dependency {dep.asset_key} (mapped to argument {input_name}) from the asset. Only non-argument dependencies can be changed or removed using map_asset_specs.", + ) + + remaining_original_deps_by_key = {dep.asset_key: dep for dep in remaining_original_deps} + remaining_ins = { + input_name: the_in + for input_name, the_in in assets_def.node_def.input_dict.items() + if assets_def.node_keys_by_input_name[input_name] in remaining_original_deps_by_key + } + all_ins = merge_dicts( + remaining_ins, + { + stringify_asset_key_to_input_name(dep.asset_key): In(dagster_type=Nothing) + for dep in new_deps + }, + ) + + return assets_def.__class__.dagster_internal_init( + **{ + **assets_def.get_attributes_dict(), + "node_def": assets_def.op.with_replaced_properties( + name=assets_def.op.name, ins=all_ins + ), + "specs": replaced_specs, + } + ) diff --git a/python_modules/dagster/dagster/_core/definitions/decorators/decorator_assets_definition_builder.py b/python_modules/dagster/dagster/_core/definitions/decorators/decorator_assets_definition_builder.py index db4b32a374dbe..0eab760bc93e7 100644 --- a/python_modules/dagster/dagster/_core/definitions/decorators/decorator_assets_definition_builder.py +++ b/python_modules/dagster/dagster/_core/definitions/decorators/decorator_assets_definition_builder.py @@ -34,6 +34,7 @@ ASSET_SUBSET_INPUT_PREFIX, AssetsDefinition, get_partition_mappings_from_deps, + stringify_asset_key_to_input_name, ) from dagster._core.definitions.backfill_policy import BackfillPolicy from dagster._core.definitions.decorators.op_decorator import _Op @@ -55,10 +56,6 @@ ) -def stringify_asset_key_to_input_name(asset_key: AssetKey) -> str: - return "_".join(asset_key.path).replace("-", "_") - - def get_function_params_without_context_or_config_or_resources( fn: Callable[..., Any], ) -> List[Parameter]: diff --git a/python_modules/dagster/dagster/_core/definitions/op_definition.py b/python_modules/dagster/dagster/_core/definitions/op_definition.py index 58bdfcb9b1c2f..c70a520409654 100644 --- a/python_modules/dagster/dagster/_core/definitions/op_definition.py +++ b/python_modules/dagster/dagster/_core/definitions/op_definition.py @@ -367,12 +367,14 @@ def with_replaced_properties( ) -> "OpDefinition": return OpDefinition.dagster_internal_init( name=name, - ins=ins - or {input_def.name: In.from_definition(input_def) for input_def in self.input_defs}, - outs=outs - or { + ins={input_def.name: In.from_definition(input_def) for input_def in self.input_defs} + if ins is None + else ins, + outs={ output_def.name: Out.from_definition(output_def) for output_def in self.output_defs - }, + } + if outs is None + else outs, compute_fn=self.compute_fn, config_schema=config_schema or self.config_schema, description=description or self.description, diff --git a/python_modules/dagster/dagster/_core/definitions/run_status_sensor_definition.py b/python_modules/dagster/dagster/_core/definitions/run_status_sensor_definition.py index 05bc928f2b084..984881085ed08 100644 --- a/python_modules/dagster/dagster/_core/definitions/run_status_sensor_definition.py +++ b/python_modules/dagster/dagster/_core/definitions/run_status_sensor_definition.py @@ -766,6 +766,42 @@ def _wrapped_fn( ascending=True, limit=fetch_limit, ).records + elif ( + context.instance.event_log_storage.supports_run_status_change_job_name_filter + and monitored_jobs + and all( + [ + not isinstance(monitored, (RepositorySelector, CodeLocationSelector)) + for monitored in monitored_jobs + ] + ) + ): + # the event log storage supports run status change selectors... we should construct + # the appropriate job selectors so that we can filter the events by jobs in the + # storage layer instead of in memory. This should improve throughput since we will + # avoid fetching events that we will filter out later on. + job_names = _job_names_for_monitored( + cast( + Sequence[ + Union[ + JobDefinition, + GraphDefinition, + UnresolvedAssetJobDefinition, + "JobSelector", + ] + ], + monitored_jobs, + ) + ) + event_records = context.instance.fetch_run_status_changes( + records_filter=RunStatusChangeRecordsFilter( + event_type=cast(RunStatusChangeEventType, event_type), + after_storage_id=sensor_cursor.record_id, + job_names=job_names, + ), + ascending=True, + limit=fetch_limit, + ).records else: # the cursor storage id is globally unique, either because the event log storage is # not run sharded or because the cursor was set from an event returned from the @@ -1126,3 +1162,24 @@ def inner( ) return inner + + +def _job_names_for_monitored( + monitored: Sequence[ + Union[ + JobDefinition, + GraphDefinition, + UnresolvedAssetJobDefinition, + "JobSelector", + ] + ], +) -> Sequence[str]: + from dagster._core.definitions.selector import JobSelector + + job_names = [] + for m in monitored: + if isinstance(m, JobSelector): + job_names.append(m.job_name) + else: + job_names.append(m.name) + return job_names diff --git a/python_modules/dagster/dagster/_core/event_api.py b/python_modules/dagster/dagster/_core/event_api.py index 2590b525dc64b..65d1eac18ae31 100644 --- a/python_modules/dagster/dagster/_core/event_api.py +++ b/python_modules/dagster/dagster/_core/event_api.py @@ -349,6 +349,7 @@ class RunStatusChangeRecordsFilter( ("after_storage_id", PublicAttr[Optional[int]]), ("before_storage_id", PublicAttr[Optional[int]]), ("storage_ids", PublicAttr[Optional[Sequence[int]]]), + ("job_names", Optional[Sequence[str]]), ], ) ): @@ -376,6 +377,7 @@ def __new__( after_storage_id: Optional[int] = None, before_storage_id: Optional[int] = None, storage_ids: Optional[Sequence[int]] = None, + job_names: Optional[Sequence[str]] = None, ): if event_type not in EVENT_TYPE_TO_PIPELINE_RUN_STATUS: check.failed("Invalid event type for run status change event filter") @@ -388,9 +390,10 @@ def __new__( after_storage_id=check.opt_int_param(after_storage_id, "after_storage_id"), before_storage_id=check.opt_int_param(before_storage_id, "before_storage_id"), storage_ids=check.opt_nullable_sequence_param(storage_ids, "storage_ids", of_type=int), + job_names=check.opt_nullable_sequence_param(job_names, "job_names", of_type=str), ) - def to_event_records_filter( + def to_event_records_filter_without_job_names( self, cursor: Optional[str] = None, ascending: bool = False ) -> EventRecordsFilter: before_cursor_storage_id, after_cursor_storage_id = EventRecordsFilter.get_cursor_params( diff --git a/python_modules/dagster/dagster/_core/execution/asset_backfill.py b/python_modules/dagster/dagster/_core/execution/asset_backfill.py index 590ad6f7ecb82..5d33dc5246b6c 100644 --- a/python_modules/dagster/dagster/_core/execution/asset_backfill.py +++ b/python_modules/dagster/dagster/_core/execution/asset_backfill.py @@ -1605,6 +1605,8 @@ def can_run_with_parent( candidate.asset_key, parent_asset_key=parent.asset_key ) + is_self_dependency = parent.asset_key == candidate.asset_key + parent_node = asset_graph.get(parent.asset_key) candidate_node = asset_graph.get(candidate.asset_key) # checks if there is a simple partition mapping between the parent and the child @@ -1666,8 +1668,11 @@ def can_run_with_parent( or parent_node.backfill_policy.max_partitions_per_run > len(asset_partitions_to_request_map[parent.asset_key]) ) - # all targeted parents are being requested this tick - and len(asset_partitions_to_request_map[parent.asset_key]) == parent_target_subset.size + # all targeted parents are being requested this tick, or its a self dependency + and ( + len(asset_partitions_to_request_map[parent.asset_key]) == parent_target_subset.size + or is_self_dependency + ) ) ): return True, "" diff --git a/python_modules/dagster/dagster/_core/execution/retries.py b/python_modules/dagster/dagster/_core/execution/retries.py index 39dcf1d65d60e..342069dd6ad1f 100644 --- a/python_modules/dagster/dagster/_core/execution/retries.py +++ b/python_modules/dagster/dagster/_core/execution/retries.py @@ -9,15 +9,12 @@ _check as check, ) from dagster._core.storage.dagster_run import DagsterRun, DagsterRunStatus -from dagster._core.storage.tags import ( - MAX_RETRIES_TAG, - RETRY_ON_ASSET_OR_OP_FAILURE_TAG, - RUN_FAILURE_REASON_TAG, -) +from dagster._core.storage.tags import MAX_RETRIES_TAG, RETRY_ON_ASSET_OR_OP_FAILURE_TAG from dagster._serdes.serdes import whitelist_for_serdes from dagster._utils.tags import get_boolean_tag_value if TYPE_CHECKING: + from dagster._core.events import RunFailureReason from dagster._core.instance import DagsterInstance @@ -82,7 +79,9 @@ def snapshot_attempts(self) -> Mapping[str, int]: return dict(self._attempts) -def auto_reexecution_should_retry_run(instance: "DagsterInstance", run: DagsterRun): +def auto_reexecution_should_retry_run( + instance: "DagsterInstance", run: DagsterRun, run_failure_reason: Optional["RunFailureReason"] +): """Determines if a run will be retried by the automatic reexcution system. A run will retry if: - it is failed. @@ -120,6 +119,13 @@ def auto_reexecution_should_retry_run(instance: "DagsterInstance", run: DagsterR if run.status != DagsterRunStatus.FAILURE: return False + retry_on_asset_or_op_failure = get_boolean_tag_value( + run.tags.get(RETRY_ON_ASSET_OR_OP_FAILURE_TAG), + default_value=instance.run_retries_retry_on_asset_or_op_failure, + ) + if run_failure_reason == RunFailureReason.STEP_FAILURE and not retry_on_asset_or_op_failure: + return False + raw_max_retries_tag = run.tags.get(MAX_RETRIES_TAG) if raw_max_retries_tag is None: max_retries = instance.run_retries_max_retries @@ -136,15 +142,5 @@ def auto_reexecution_should_retry_run(instance: "DagsterInstance", run: DagsterR # since the original run is in the run group, the number of retries launched # so far is len(run_group_iter) - 1 if len(list(run_group_iter)) <= max_retries: - retry_on_asset_or_op_failure = get_boolean_tag_value( - run.tags.get(RETRY_ON_ASSET_OR_OP_FAILURE_TAG), - default_value=instance.run_retries_retry_on_asset_or_op_failure, - ) - if ( - run.tags.get(RUN_FAILURE_REASON_TAG) == RunFailureReason.STEP_FAILURE.value - and not retry_on_asset_or_op_failure - ): - return False - else: - return True + return True return False diff --git a/python_modules/dagster/dagster/_core/execution/stats.py b/python_modules/dagster/dagster/_core/execution/stats.py index 1175951de2eb6..7ae0756838f5d 100644 --- a/python_modules/dagster/dagster/_core/execution/stats.py +++ b/python_modules/dagster/dagster/_core/execution/stats.py @@ -4,11 +4,24 @@ import dagster._check as check from dagster._core.definitions import ExpectationResult -from dagster._core.events import MARKER_EVENTS, DagsterEventType, StepExpectationResultData +from dagster._core.events import ( + MARKER_EVENTS, + PIPELINE_EVENTS, + DagsterEventType, + StepExpectationResultData, +) from dagster._core.events.log import EventLogEntry from dagster._core.storage.dagster_run import DagsterRunStatsSnapshot from dagster._serdes import whitelist_for_serdes +RUN_STATS_EVENT_TYPES = { + *PIPELINE_EVENTS, + DagsterEventType.STEP_FAILURE, + DagsterEventType.STEP_SUCCESS, + DagsterEventType.ASSET_MATERIALIZATION, + DagsterEventType.STEP_EXPECTATION_RESULT, +} + STEP_STATS_EVENT_TYPES = { DagsterEventType.STEP_START, DagsterEventType.STEP_FAILURE, diff --git a/python_modules/dagster/dagster/_core/instance/__init__.py b/python_modules/dagster/dagster/_core/instance/__init__.py index fb42ffe3b8c2a..25d251bc30679 100644 --- a/python_modules/dagster/dagster/_core/instance/__init__.py +++ b/python_modules/dagster/dagster/_core/instance/__init__.py @@ -77,6 +77,7 @@ PARTITION_NAME_TAG, RESUME_RETRY_TAG, ROOT_RUN_ID_TAG, + RUN_FAILURE_REASON_TAG, TAGS_TO_MAYBE_OMIT_ON_RETRY, WILL_RETRY_TAG, ) @@ -2443,6 +2444,8 @@ def handle_new_event( event (EventLogEntry): The event to handle. batch_metadata (Optional[DagsterEventBatchMetadata]): Metadata for batch writing. """ + from dagster._core.events import RunFailureReason + if batch_metadata is None or not _is_batch_writing_enabled(): events = [event] else: @@ -2484,9 +2487,18 @@ def handle_new_event( if run and event.get_dagster_event().is_run_failure and self.run_retries_enabled: # Note that this tag is only applied to runs that fail. Successful runs will not # have a WILL_RETRY_TAG tag. + run_failure_reason = ( + RunFailureReason(run.tags.get(RUN_FAILURE_REASON_TAG)) + if run.tags.get(RUN_FAILURE_REASON_TAG) + else None + ) self.add_run_tags( run_id, - {WILL_RETRY_TAG: str(auto_reexecution_should_retry_run(self, run)).lower()}, + { + WILL_RETRY_TAG: str( + auto_reexecution_should_retry_run(self, run, run_failure_reason) + ).lower() + }, ) for sub in self._subscribers[run_id]: sub(event) diff --git a/python_modules/dagster/dagster/_core/pipes/context.py b/python_modules/dagster/dagster/_core/pipes/context.py index a0533ce3379f9..b5d82e9454927 100644 --- a/python_modules/dagster/dagster/_core/pipes/context.py +++ b/python_modules/dagster/dagster/_core/pipes/context.py @@ -132,9 +132,8 @@ def _resolve_metadata( k: self._resolve_metadata_value(v["raw_value"], v["type"]) for k, v in metadata.items() } - def _resolve_metadata_value( - self, value: Any, metadata_type: PipesMetadataType - ) -> MetadataValue: + @staticmethod + def _resolve_metadata_value(value: Any, metadata_type: PipesMetadataType) -> MetadataValue: if metadata_type == PIPES_METADATA_TYPE_INFER: return normalize_metadata_value(value) elif metadata_type == "text": diff --git a/python_modules/dagster/dagster/_core/pipes/utils.py b/python_modules/dagster/dagster/_core/pipes/utils.py index add8889626ca5..e161c6f2e3bba 100644 --- a/python_modules/dagster/dagster/_core/pipes/utils.py +++ b/python_modules/dagster/dagster/_core/pipes/utils.py @@ -134,13 +134,18 @@ class PipesFileMessageReader(PipesMessageReader): Args: path (str): The path of the file to which messages will be written. The file will be deleted on close of the pipes session. + include_stdio_in_messages (bool): Whether to include stdout/stderr logs in the messages produced by the message writer in the external process. + cleanup_file (bool): Whether to delete the file on close of the pipes session. """ - def __init__(self, path: str, include_stdio_in_messages: bool = False): + def __init__( + self, path: str, include_stdio_in_messages: bool = False, cleanup_file: bool = True + ): self._path = check.str_param(path, "path") self._include_stdio_in_messages = check.bool_param( include_stdio_in_messages, "include_stdio_in_messages" ) + self._cleanup_file = cleanup_file def on_launched(self, params: PipesLaunchedData) -> None: self.launched_payload = params @@ -178,7 +183,7 @@ def read_messages( is_session_closed.set() if thread: thread.join() - if os.path.exists(self._path): + if os.path.exists(self._path) and self._cleanup_file: os.remove(self._path) def _reader_thread(self, handler: "PipesMessageHandler", is_resource_complete: Event) -> None: diff --git a/python_modules/dagster/dagster/_core/remote_representation/external_data.py b/python_modules/dagster/dagster/_core/remote_representation/external_data.py index 3afe97064d9af..637ac03ca6e9d 100644 --- a/python_modules/dagster/dagster/_core/remote_representation/external_data.py +++ b/python_modules/dagster/dagster/_core/remote_representation/external_data.py @@ -6,6 +6,7 @@ import inspect import json +import os from abc import ABC, abstractmethod from collections import defaultdict from enum import Enum @@ -102,7 +103,11 @@ from dagster._core.utils import is_valid_email from dagster._record import IHaveNew, record, record_custom from dagster._serdes import whitelist_for_serdes -from dagster._serdes.serdes import FieldSerializer, is_whitelisted_for_serdes_object +from dagster._serdes.serdes import ( + FieldSerializer, + get_prefix_for_a_serialized, + is_whitelisted_for_serdes_object, +) from dagster._time import datetime_from_timestamp from dagster._utils.error import SerializableErrorInfo from dagster._utils.warnings import suppress_dagster_warnings @@ -388,10 +393,13 @@ def __new__( ) +_JOB_SNAP_STORAGE_FIELD = "pipeline_snapshot" + + @whitelist_for_serdes( storage_name="ExternalPipelineData", storage_field_names={ - "job": "pipeline_snapshot", + "job": _JOB_SNAP_STORAGE_FIELD, "parent_job": "parent_pipeline_snapshot", }, # There was a period during which `JobDefinition` was a newer subclass of the legacy @@ -1859,3 +1867,44 @@ def resolve_automation_condition_args( else: # for non-serializable conditions, only include the snapshot return None, automation_condition.get_snapshot() + + +def _extract_fast(serialized_job_data: str): + target_key = f'"{_JOB_SNAP_STORAGE_FIELD}": ' + target_substr = target_key + get_prefix_for_a_serialized(JobSnap) + # look for key: type + idx = serialized_job_data.find(target_substr) + check.invariant(idx > 0) + # slice starting after key: + start_idx = idx + len(target_key) + + # trim outer object } + # assumption that pipeline_snapshot is last field under test in test_job_data_snap_layout + serialized_job_snap = serialized_job_data[start_idx:-1] + check.invariant(serialized_job_snap[0] == "{" and serialized_job_snap[-1] == "}") + + return serialized_job_snap + + +def _extract_safe(serialized_job_data: str): + # Intentionally use json directly instead of serdes to avoid losing information if the current process + # is older than the source process. + return json.dumps(json.loads(serialized_job_data)[_JOB_SNAP_STORAGE_FIELD]) + + +DISABLE_FAST_EXTRACT_ENV_VAR = "DAGSTER_DISABLE_JOB_SNAP_FAST_EXTRACT" + + +def extract_serialized_job_snap_from_serialized_job_data_snap(serialized_job_data_snap: str): + # utility used by DagsterCloudAgent to extract JobSnap out of JobDataSnap + # efficiently and safely + if not serialized_job_data_snap.startswith(get_prefix_for_a_serialized(JobDataSnap)): + raise Exception("Passed in string does not meet expectations for a serialized JobDataSnap") + + if not os.getenv(DISABLE_FAST_EXTRACT_ENV_VAR): + try: + return _extract_fast(serialized_job_data_snap) + except Exception: + pass + + return _extract_safe(serialized_job_data_snap) diff --git a/python_modules/dagster/dagster/_core/snap/dep_snapshot.py b/python_modules/dagster/dagster/_core/snap/dep_snapshot.py index d5508723810ba..8174f35afc17c 100644 --- a/python_modules/dagster/dagster/_core/snap/dep_snapshot.py +++ b/python_modules/dagster/dagster/_core/snap/dep_snapshot.py @@ -1,4 +1,5 @@ from collections import defaultdict +from functools import cached_property from typing import DefaultDict, Dict, List, Mapping, NamedTuple, Sequence import dagster._check as check @@ -140,11 +141,7 @@ def get_upstream_outputs(self, node_name: str, input_name: str) -> Sequence["Out check.str_param(node_name, "node_name") check.str_param(input_name, "input_name") - for input_dep_snap in self.get_invocation(node_name).input_dep_snaps: - if input_dep_snap.input_name == input_name: - return input_dep_snap.upstream_output_snaps - - check.failed(f"Input {input_name} not found for node {node_name}") + return self.get_invocation(node_name).input_dep_snap(input_name).upstream_output_snaps def get_upstream_output(self, node_name: str, input_name: str) -> "OutputHandleSnap": check.str_param(node_name, "node_name") @@ -235,9 +232,13 @@ def __new__( is_dynamic_mapped=check.bool_param(is_dynamic_mapped, "is_dynamic_mapped"), ) + @cached_property + def input_dep_map(self) -> Mapping[str, InputDependencySnap]: + return {inp_snap.input_name: inp_snap for inp_snap in self.input_dep_snaps} + def input_dep_snap(self, input_name: str) -> InputDependencySnap: - for inp_snap in self.input_dep_snaps: - if inp_snap.input_name == input_name: - return inp_snap + inp_snap = self.input_dep_map.get(input_name) + if inp_snap: + return inp_snap - check.failed(f"No input found named {input_name}") + check.failed(f"Input {input_name} not found for node {self.node_name}") diff --git a/python_modules/dagster/dagster/_core/snap/node.py b/python_modules/dagster/dagster/_core/snap/node.py index e05f6d972411a..e56fe51fc32dd 100644 --- a/python_modules/dagster/dagster/_core/snap/node.py +++ b/python_modules/dagster/dagster/_core/snap/node.py @@ -1,4 +1,5 @@ -from typing import Mapping, NamedTuple, Optional, Sequence, Union +from functools import cached_property +from typing import Mapping, Optional, Sequence, Union import dagster._check as check from dagster._config import ConfigFieldSnap, snap_from_field @@ -20,6 +21,7 @@ DependencyStructureSnapshot, build_dep_structure_snapshot_from_graph_def, ) +from dagster._record import IHaveNew, record, record_custom from dagster._serdes import whitelist_for_serdes from dagster._utils.warnings import suppress_dagster_warnings @@ -29,17 +31,13 @@ field_serializers={"metadata": MetadataFieldSerializer}, skip_when_empty_fields={"metadata"}, ) -class InputDefSnap( - NamedTuple( - "_InputDefSnap", - [ - ("name", str), - ("dagster_type_key", str), - ("description", Optional[str]), - ("metadata", Mapping[str, MetadataValue]), - ], - ) -): +@record_custom +class InputDefSnap(IHaveNew): + name: str + dagster_type_key: str + description: Optional[str] + metadata: Mapping[str, MetadataValue] + def __new__( cls, name: str, @@ -47,11 +45,11 @@ def __new__( description: Optional[str], metadata: Optional[Mapping[str, MetadataValue]] = None, ): - return super(InputDefSnap, cls).__new__( + return super().__new__( cls, - name=check.str_param(name, "name"), - dagster_type_key=check.str_param(dagster_type_key, "dagster_type_key"), - description=check.opt_str_param(description, "description"), + name=name, + dagster_type_key=dagster_type_key, + description=description, metadata=normalize_metadata( check.opt_mapping_param(metadata, "metadata", key_type=str), allow_invalid=True ), @@ -63,19 +61,15 @@ def __new__( field_serializers={"metadata": MetadataFieldSerializer}, skip_when_empty_fields={"metadata"}, ) -class OutputDefSnap( - NamedTuple( - "_OutputDefSnap", - [ - ("name", str), - ("dagster_type_key", str), - ("description", Optional[str]), - ("is_required", bool), - ("metadata", Mapping[str, MetadataValue]), - ("is_dynamic", bool), - ], - ) -): +@record_custom +class OutputDefSnap(IHaveNew): + name: str + dagster_type_key: str + description: Optional[str] + is_required: bool + metadata: Mapping[str, MetadataValue] + is_dynamic: bool + def __new__( cls, name: str, @@ -85,42 +79,25 @@ def __new__( metadata: Optional[Mapping[str, MetadataValue]] = None, is_dynamic: bool = False, ): - return super(OutputDefSnap, cls).__new__( + return super().__new__( cls, - name=check.str_param(name, "name"), - dagster_type_key=check.str_param(dagster_type_key, "dagster_type_key"), - description=check.opt_str_param(description, "description"), - is_required=check.bool_param(is_required, "is_required"), + name=name, + dagster_type_key=dagster_type_key, + description=description, + is_required=is_required, metadata=normalize_metadata( check.opt_mapping_param(metadata, "metadata", key_type=str), allow_invalid=True ), - is_dynamic=check.bool_param(is_dynamic, "is_dynamic"), + is_dynamic=is_dynamic, ) @whitelist_for_serdes(storage_field_names={"mapped_node_name": "mapped_solid_name"}) -class OutputMappingSnap( - NamedTuple( - "_OutputMappingSnap", - [ - ("mapped_node_name", str), - ("mapped_output_name", str), - ("external_output_name", str), - ], - ) -): - def __new__( - cls, - mapped_node_name: str, - mapped_output_name: str, - external_output_name: str, - ): - return super(OutputMappingSnap, cls).__new__( - cls, - mapped_node_name=check.str_param(mapped_node_name, "mapped_node_name"), - mapped_output_name=check.str_param(mapped_output_name, "mapped_output_name"), - external_output_name=check.str_param(external_output_name, "external_output_name"), - ) +@record +class OutputMappingSnap: + mapped_node_name: str + mapped_output_name: str + external_output_name: str def build_output_mapping_snap(output_mapping: OutputMapping) -> OutputMappingSnap: @@ -132,23 +109,11 @@ def build_output_mapping_snap(output_mapping: OutputMapping) -> OutputMappingSna @whitelist_for_serdes(storage_field_names={"mapped_node_name": "mapped_solid_name"}) -class InputMappingSnap( - NamedTuple( - "_InputMappingSnap", - [ - ("mapped_node_name", str), - ("mapped_input_name", str), - ("external_input_name", str), - ], - ) -): - def __new__(cls, mapped_node_name: str, mapped_input_name: str, external_input_name: str): - return super(InputMappingSnap, cls).__new__( - cls, - mapped_node_name=check.str_param(mapped_node_name, "mapped_node_name"), - mapped_input_name=check.str_param(mapped_input_name, "mapped_input_name"), - external_input_name=check.str_param(external_input_name, "external_input_name"), - ) +@record +class InputMappingSnap: + mapped_node_name: str + mapped_input_name: str + external_input_name: str def build_input_mapping_snap(input_mapping: InputMapping) -> InputMappingSnap: @@ -182,56 +147,25 @@ def build_output_def_snap(output_def: OutputDefinition) -> OutputDefSnap: @whitelist_for_serdes(storage_name="CompositeSolidDefSnap") -class GraphDefSnap( - NamedTuple( - "_GraphDefSnap", - [ - ("name", str), - ("input_def_snaps", Sequence[InputDefSnap]), - ("output_def_snaps", Sequence[OutputDefSnap]), - ("description", Optional[str]), - ("tags", Mapping[str, object]), - ("config_field_snap", Optional[ConfigFieldSnap]), - ("dep_structure_snapshot", DependencyStructureSnapshot), - ("input_mapping_snaps", Sequence[InputMappingSnap]), - ("output_mapping_snaps", Sequence[OutputMappingSnap]), - ], - ) -): - def __new__( - cls, - name: str, - input_def_snaps: Sequence[InputDefSnap], - output_def_snaps: Sequence[OutputDefSnap], - description: Optional[str], - tags: Mapping[str, str], - config_field_snap: Optional[ConfigFieldSnap], - dep_structure_snapshot: DependencyStructureSnapshot, - input_mapping_snaps: Sequence[InputMappingSnap], - output_mapping_snaps: Sequence[OutputMappingSnap], - ): - return super(GraphDefSnap, cls).__new__( - cls, - dep_structure_snapshot=check.inst_param( - dep_structure_snapshot, "dep_structure_snapshot", DependencyStructureSnapshot - ), - input_mapping_snaps=check.sequence_param( - input_mapping_snaps, "input_mapping_snaps", of_type=InputMappingSnap - ), - output_mapping_snaps=check.sequence_param( - output_mapping_snaps, "output_mapping_snaps", of_type=OutputMappingSnap - ), - name=check.str_param(name, "name"), - input_def_snaps=check.sequence_param(input_def_snaps, "input_def_snaps", InputDefSnap), - output_def_snaps=check.sequence_param( - output_def_snaps, "output_def_snaps", OutputDefSnap - ), - description=check.opt_str_param(description, "description"), - tags=check.mapping_param(tags, "tags"), - config_field_snap=check.opt_inst_param( - config_field_snap, "config_field_snap", ConfigFieldSnap - ), - ) +@record +class GraphDefSnap: + name: str + input_def_snaps: Sequence[InputDefSnap] + output_def_snaps: Sequence[OutputDefSnap] + description: Optional[str] + tags: Mapping[str, str] + config_field_snap: Optional[ConfigFieldSnap] + dep_structure_snapshot: DependencyStructureSnapshot + input_mapping_snaps: Sequence[InputMappingSnap] + output_mapping_snaps: Sequence[OutputMappingSnap] + + @cached_property + def input_def_map(self) -> Mapping[str, InputDefSnap]: + return {input_def.name: input_def for input_def in self.input_def_snaps} + + @cached_property + def output_def_map(self) -> Mapping[str, OutputDefSnap]: + return {output_def.name: output_def for output_def in self.output_def_snaps} def get_input_snap(self, name: str) -> InputDefSnap: return _get_input_snap(self, name) @@ -241,46 +175,23 @@ def get_output_snap(self, name: str) -> OutputDefSnap: @whitelist_for_serdes(storage_name="SolidDefSnap") -class OpDefSnap( - NamedTuple( - "_OpDefSnap", - [ - ("name", str), - ("input_def_snaps", Sequence[InputDefSnap]), - ("output_def_snaps", Sequence[OutputDefSnap]), - ("description", Optional[str]), - ("tags", Mapping[str, object]), - ("required_resource_keys", Sequence[str]), - ("config_field_snap", Optional[ConfigFieldSnap]), - ], - ) -): - def __new__( - cls, - name: str, - input_def_snaps: Sequence[InputDefSnap], - output_def_snaps: Sequence[OutputDefSnap], - description: Optional[str], - tags: Mapping[str, str], - required_resource_keys: Sequence[str], - config_field_snap: Optional[ConfigFieldSnap], - ): - return super(OpDefSnap, cls).__new__( - cls, - required_resource_keys=check.sequence_param( - required_resource_keys, "required_resource_keys", str - ), - name=check.str_param(name, "name"), - input_def_snaps=check.sequence_param(input_def_snaps, "input_def_snaps", InputDefSnap), - output_def_snaps=check.sequence_param( - output_def_snaps, "output_def_snaps", OutputDefSnap - ), - description=check.opt_str_param(description, "description"), - tags=check.mapping_param(tags, "tags"), - config_field_snap=check.opt_inst_param( - config_field_snap, "config_field_snap", ConfigFieldSnap - ), - ) +@record +class OpDefSnap: + name: str + input_def_snaps: Sequence[InputDefSnap] + output_def_snaps: Sequence[OutputDefSnap] + description: Optional[str] + tags: Mapping[str, str] + required_resource_keys: Sequence[str] + config_field_snap: Optional[ConfigFieldSnap] + + @cached_property + def input_def_map(self) -> Mapping[str, InputDefSnap]: + return {input_def.name: input_def for input_def in self.input_def_snaps} + + @cached_property + def output_def_map(self) -> Mapping[str, OutputDefSnap]: + return {output_def.name: output_def for output_def in self.output_def_snaps} def get_input_snap(self, name: str) -> InputDefSnap: return _get_input_snap(self, name) @@ -296,35 +207,10 @@ def get_output_snap(self, name: str) -> OutputDefSnap: "graph_def_snaps": "composite_solid_def_snaps", }, ) -class NodeDefsSnapshot( - NamedTuple( - "_NodeDefsSnapshot", - [ - ("op_def_snaps", Sequence[OpDefSnap]), - ("graph_def_snaps", Sequence[GraphDefSnap]), - ], - ) -): - def __new__( - cls, - op_def_snaps: Sequence[OpDefSnap], - graph_def_snaps: Sequence[GraphDefSnap], - ): - return super(NodeDefsSnapshot, cls).__new__( - cls, - op_def_snaps=sorted( - check.sequence_param(op_def_snaps, "op_def_snaps", of_type=OpDefSnap), - key=lambda op_def: op_def.name, - ), - graph_def_snaps=sorted( - check.sequence_param( - graph_def_snaps, - "graph_def_snaps", - of_type=GraphDefSnap, - ), - key=lambda graph_def: graph_def.name, - ), - ) +@record +class NodeDefsSnapshot(IHaveNew): + op_def_snaps: Sequence[OpDefSnap] + graph_def_snaps: Sequence[GraphDefSnap] @suppress_dagster_warnings @@ -341,8 +227,14 @@ def build_node_defs_snapshot(job_def: JobDefinition) -> NodeDefsSnapshot: check.failed(f"Unexpected NodeDefinition type {node_def}") return NodeDefsSnapshot( - op_def_snaps=op_def_snaps, - graph_def_snaps=graph_def_snaps, + op_def_snaps=sorted( + op_def_snaps, + key=lambda op_def: op_def.name, + ), + graph_def_snaps=sorted( + graph_def_snaps, + key=lambda graph_def: graph_def.name, + ), ) @@ -387,9 +279,9 @@ def build_op_def_snap(op_def: OpDefinition) -> OpDefSnap: # shared impl for GraphDefSnap and OpDefSnap def _get_input_snap(node_def: Union[GraphDefSnap, OpDefSnap], name: str) -> InputDefSnap: check.str_param(name, "name") - for inp in node_def.input_def_snaps: - if inp.name == name: - return inp + inp = node_def.input_def_map.get(name) + if inp: + return inp check.failed(f"Could not find input {name} in op def {node_def.name}") @@ -397,8 +289,8 @@ def _get_input_snap(node_def: Union[GraphDefSnap, OpDefSnap], name: str) -> Inpu # shared impl for GraphDefSnap and OpDefSnap def _get_output_snap(node_def: Union[GraphDefSnap, OpDefSnap], name: str) -> OutputDefSnap: check.str_param(name, "name") - for out in node_def.output_def_snaps: - if out.name == name: - return out + inp = node_def.output_def_map.get(name) + if inp: + return inp check.failed(f"Could not find output {name} in node def {node_def.name}") diff --git a/python_modules/dagster/dagster/_core/storage/asset_check_execution_record.py b/python_modules/dagster/dagster/_core/storage/asset_check_execution_record.py index 32f9d68a48a6f..8b233099eae33 100644 --- a/python_modules/dagster/dagster/_core/storage/asset_check_execution_record.py +++ b/python_modules/dagster/dagster/_core/storage/asset_check_execution_record.py @@ -29,6 +29,12 @@ class AssetCheckExecutionRecordStatus(enum.Enum): FAILED = "FAILED" # explicit fail result +COMPLETED_ASSET_CHECK_EXECUTION_RECORD_STATUSES = { + AssetCheckExecutionRecordStatus.SUCCEEDED, + AssetCheckExecutionRecordStatus.FAILED, +} + + class AssetCheckExecutionResolvedStatus(enum.Enum): IN_PROGRESS = "IN_PROGRESS" SUCCEEDED = "SUCCEEDED" diff --git a/python_modules/dagster/dagster/_core/storage/event_log/base.py b/python_modules/dagster/dagster/_core/storage/event_log/base.py index 96ce375465ccf..b82eae23ad458 100644 --- a/python_modules/dagster/dagster/_core/storage/event_log/base.py +++ b/python_modules/dagster/dagster/_core/storage/event_log/base.py @@ -1,6 +1,7 @@ from abc import ABC, abstractmethod from typing import ( TYPE_CHECKING, + AbstractSet, Iterable, Mapping, NamedTuple, @@ -28,6 +29,7 @@ ) from dagster._core.events import DagsterEventType from dagster._core.execution.stats import ( + RUN_STATS_EVENT_TYPES, STEP_STATS_EVENT_TYPES, RunStepKeyStatsSnapshot, build_run_stats_from_events, @@ -35,7 +37,10 @@ ) from dagster._core.instance import MayHaveInstanceWeakref, T_DagsterInstance from dagster._core.loader import LoadableBy, LoadingContext -from dagster._core.storage.asset_check_execution_record import AssetCheckExecutionRecord +from dagster._core.storage.asset_check_execution_record import ( + AssetCheckExecutionRecord, + AssetCheckExecutionRecordStatus, +) from dagster._core.storage.dagster_run import DagsterRunStatsSnapshot from dagster._core.storage.partition_status_cache import get_and_update_asset_status_cache_value from dagster._core.storage.sql import AlembicVersion @@ -157,6 +162,7 @@ class AssetCheckSummaryRecord( ("asset_check_key", AssetCheckKey), ("last_check_execution_record", Optional[AssetCheckExecutionRecord]), ("last_run_id", Optional[str]), + ("last_completed_check_execution_record", Optional[AssetCheckExecutionRecord]), ], ), LoadableBy[AssetCheckKey], @@ -170,6 +176,14 @@ def _blocking_batch_load( ) return [records_by_key[key] for key in keys] + @property + def last_completed_run_id(self) -> Optional[str]: + return ( + self.last_completed_check_execution_record.run_id + if self.last_completed_check_execution_record + else None + ) + class PlannedMaterializationInfo(NamedTuple): """Internal representation of an planned materialization event, containing storage_id / run_id. @@ -242,7 +256,9 @@ def get_records_for_run( def get_stats_for_run(self, run_id: str) -> DagsterRunStatsSnapshot: """Get a summary of events that have ocurred in a run.""" - return build_run_stats_from_events(run_id, self.get_logs_for_run(run_id)) + return build_run_stats_from_events( + run_id, self.get_logs_for_run(run_id, of_type=RUN_STATS_EVENT_TYPES) + ) def get_step_stats_for_run( self, run_id: str, step_keys: Optional[Sequence[str]] = None @@ -600,6 +616,7 @@ def get_asset_check_execution_history( check_key: AssetCheckKey, limit: int, cursor: Optional[int] = None, + status: Optional[AbstractSet[AssetCheckExecutionRecordStatus]] = None, ) -> Sequence[AssetCheckExecutionRecord]: """Get executions for one asset check, sorted by recency.""" pass @@ -631,6 +648,10 @@ def fetch_observations( ) -> EventRecordsResult: raise NotImplementedError() + @property + def supports_run_status_change_job_name_filter(self) -> bool: + return False + @abstractmethod def fetch_run_status_changes( self, diff --git a/python_modules/dagster/dagster/_core/storage/event_log/sql_event_log.py b/python_modules/dagster/dagster/_core/storage/event_log/sql_event_log.py index 62a0463e45933..1bb1398ec7ef3 100644 --- a/python_modules/dagster/dagster/_core/storage/event_log/sql_event_log.py +++ b/python_modules/dagster/dagster/_core/storage/event_log/sql_event_log.py @@ -7,6 +7,7 @@ from functools import cached_property from typing import ( TYPE_CHECKING, + AbstractSet, Any, ContextManager, Dict, @@ -52,12 +53,17 @@ ASSET_CHECK_EVENTS, ASSET_EVENTS, EVENT_TYPE_TO_PIPELINE_RUN_STATUS, - MARKER_EVENTS, DagsterEventType, ) from dagster._core.events.log import EventLogEntry -from dagster._core.execution.stats import RunStepKeyStatsSnapshot, build_run_step_stats_from_events +from dagster._core.execution.stats import ( + RUN_STATS_EVENT_TYPES, + STEP_STATS_EVENT_TYPES, + RunStepKeyStatsSnapshot, + build_run_step_stats_from_events, +) from dagster._core.storage.asset_check_execution_record import ( + COMPLETED_ASSET_CHECK_EXECUTION_RECORD_STATUSES, AssetCheckExecutionRecord, AssetCheckExecutionRecordStatus, ) @@ -572,7 +578,9 @@ def get_stats_for_run(self, run_id: str) -> DagsterRunStatsSnapshot: .where( db.and_( SqlEventLogStorageTable.c.run_id == run_id, - SqlEventLogStorageTable.c.dagster_event_type != None, # noqa: E711 + SqlEventLogStorageTable.c.dagster_event_type.in_( + [event_type.value for event_type in RUN_STATS_EVENT_TYPES] + ), ) ) .group_by("dagster_event_type") @@ -645,18 +653,7 @@ def get_step_stats_for_run( .where(SqlEventLogStorageTable.c.step_key != None) # noqa: E711 .where( SqlEventLogStorageTable.c.dagster_event_type.in_( - [ - DagsterEventType.STEP_START.value, - DagsterEventType.STEP_SUCCESS.value, - DagsterEventType.STEP_SKIPPED.value, - DagsterEventType.STEP_FAILURE.value, - DagsterEventType.STEP_RESTARTED.value, - DagsterEventType.ASSET_MATERIALIZATION.value, - DagsterEventType.STEP_EXPECTATION_RESULT.value, - DagsterEventType.STEP_RESTARTED.value, - DagsterEventType.STEP_UP_FOR_RETRY.value, - ] - + [marker_event.value for marker_event in MARKER_EVENTS] + [event_type.value for event_type in STEP_STATS_EVENT_TYPES] ) ) .order_by(SqlEventLogStorageTable.c.id.asc()) @@ -1115,12 +1112,19 @@ def fetch_run_status_changes( before_cursor, after_cursor = EventRecordsFilter.get_cursor_params(cursor, ascending) event_records_filter = ( - records_filter.to_event_records_filter(cursor, ascending) + records_filter.to_event_records_filter_without_job_names(cursor, ascending) if isinstance(records_filter, RunStatusChangeRecordsFilter) else EventRecordsFilter( event_type, before_cursor=before_cursor, after_cursor=after_cursor ) ) + has_job_name_filter = ( + isinstance(records_filter, RunStatusChangeRecordsFilter) and records_filter.job_names + ) + if has_job_name_filter and not self.supports_run_status_change_job_name_filter: + check.failed( + "Called fetch_run_status_changes with selectors, which are not supported with this storage." + ) return self._get_event_records_result(event_records_filter, limit, cursor, ascending) def get_logs_for_all_runs_by_log_id( @@ -1327,11 +1331,29 @@ def get_asset_check_summary_records( ) -> Mapping[AssetCheckKey, AssetCheckSummaryRecord]: states = {} for asset_check_key in asset_check_keys: - execution_record = self.get_asset_check_execution_history(asset_check_key, limit=1) + last_execution_record = self.get_asset_check_execution_history(asset_check_key, limit=1) + last_completed_execution_record = ( + last_execution_record + # If the check has never been executed or the latest record is a completed record, + # Avoid refetching the last completed record + if ( + not last_execution_record + or last_execution_record[0].status + in COMPLETED_ASSET_CHECK_EXECUTION_RECORD_STATUSES + ) + else self.get_asset_check_execution_history( + asset_check_key, limit=1, status=COMPLETED_ASSET_CHECK_EXECUTION_RECORD_STATUSES + ) + ) states[asset_check_key] = AssetCheckSummaryRecord( asset_check_key=asset_check_key, - last_check_execution_record=execution_record[0] if execution_record else None, - last_run_id=execution_record[0].run_id if execution_record else None, + last_check_execution_record=last_execution_record[0] + if last_execution_record + else None, + last_run_id=last_execution_record[0].run_id if last_execution_record else None, + last_completed_check_execution_record=last_completed_execution_record[0] + if last_completed_execution_record + else None, ) return states @@ -2888,6 +2910,7 @@ def get_asset_check_execution_history( check_key: AssetCheckKey, limit: int, cursor: Optional[int] = None, + status: Optional[AbstractSet[AssetCheckExecutionRecordStatus]] = None, ) -> Sequence[AssetCheckExecutionRecord]: check.inst_param(check_key, "key", AssetCheckKey) check.int_param(limit, "limit") @@ -2915,6 +2938,11 @@ def get_asset_check_execution_history( if cursor: query = query.where(AssetCheckExecutionsTable.c.id < cursor) + if status: + query = query.where( + AssetCheckExecutionsTable.c.execution_status.in_([s.value for s in status]) + ) + with self.index_connection() as conn: rows = db_fetch_mappings(conn, query) diff --git a/python_modules/dagster/dagster/_core/storage/event_log/sqlite/sqlite_event_log.py b/python_modules/dagster/dagster/_core/storage/event_log/sqlite/sqlite_event_log.py index 14559adc9b396..69807391d07c9 100644 --- a/python_modules/dagster/dagster/_core/storage/event_log/sqlite/sqlite_event_log.py +++ b/python_modules/dagster/dagster/_core/storage/event_log/sqlite/sqlite_event_log.py @@ -412,7 +412,7 @@ def fetch_run_status_changes( before_cursor, after_cursor = EventRecordsFilter.get_cursor_params(cursor, ascending) event_records_filter = ( - records_filter.to_event_records_filter(cursor, ascending) + records_filter.to_event_records_filter_without_job_names(cursor, ascending) if isinstance(records_filter, RunStatusChangeRecordsFilter) else EventRecordsFilter( event_type, before_cursor=before_cursor, after_cursor=after_cursor diff --git a/python_modules/dagster/dagster/_core/storage/legacy_storage.py b/python_modules/dagster/dagster/_core/storage/legacy_storage.py index 8873d4546b8a1..28668cb5fb2e6 100644 --- a/python_modules/dagster/dagster/_core/storage/legacy_storage.py +++ b/python_modules/dagster/dagster/_core/storage/legacy_storage.py @@ -1,4 +1,14 @@ -from typing import TYPE_CHECKING, Iterable, Mapping, Optional, Sequence, Set, Tuple, Union +from typing import ( + TYPE_CHECKING, + AbstractSet, + Iterable, + Mapping, + Optional, + Sequence, + Set, + Tuple, + Union, +) from dagster import _check as check from dagster._config.config_schema import UserConfigSchema @@ -9,7 +19,10 @@ ) from dagster._core.definitions.events import AssetKey from dagster._core.event_api import EventHandlerFn -from dagster._core.storage.asset_check_execution_record import AssetCheckExecutionRecord +from dagster._core.storage.asset_check_execution_record import ( + AssetCheckExecutionRecord, + AssetCheckExecutionRecordStatus, +) from dagster._core.storage.base_storage import DagsterStorage from dagster._core.storage.event_log.base import ( AssetCheckSummaryRecord, @@ -687,11 +700,13 @@ def get_asset_check_execution_history( check_key: "AssetCheckKey", limit: int, cursor: Optional[int] = None, + status: Optional[AbstractSet[AssetCheckExecutionRecordStatus]] = None, ) -> Sequence[AssetCheckExecutionRecord]: return self._storage.event_log_storage.get_asset_check_execution_history( check_key=check_key, limit=limit, cursor=cursor, + status=status, ) def get_latest_asset_check_execution_by_key( diff --git a/python_modules/dagster/dagster/_core/storage/tags.py b/python_modules/dagster/dagster/_core/storage/tags.py index 4b8db616409d3..b6db99d0fe28c 100644 --- a/python_modules/dagster/dagster/_core/storage/tags.py +++ b/python_modules/dagster/dagster/_core/storage/tags.py @@ -93,7 +93,7 @@ USER_EDITABLE_SYSTEM_TAGS = [ PRIORITY_TAG, MAX_RETRIES_TAG, - RESUME_RETRY_TAG, + RETRY_STRATEGY_TAG, MAX_RUNTIME_SECONDS_TAG, RUN_ISOLATION_TAG, RETRY_ON_ASSET_OR_OP_FAILURE_TAG, @@ -115,7 +115,7 @@ *RUN_METRIC_TAGS, RUN_FAILURE_REASON_TAG, RETRY_NUMBER_TAG, - RETRY_STRATEGY_TAG, + RESUME_RETRY_TAG, WILL_RETRY_TAG, AUTO_RETRY_RUN_ID_TAG, *BACKFILL_TAGS, diff --git a/python_modules/dagster/dagster/_daemon/auto_run_reexecution/auto_run_reexecution.py b/python_modules/dagster/dagster/_daemon/auto_run_reexecution/auto_run_reexecution.py index d9a1333edb1d2..65a21b0d8a1f4 100644 --- a/python_modules/dagster/dagster/_daemon/auto_run_reexecution/auto_run_reexecution.py +++ b/python_modules/dagster/dagster/_daemon/auto_run_reexecution/auto_run_reexecution.py @@ -25,12 +25,22 @@ def should_retry(run: DagsterRun, instance: DagsterInstance) -> bool: + """A more robust method of determining is a run should be retried by the daemon than just looking + at the WILL_RETRY_TAG. We account for the case where the code version is old and doesn't set the + WILL_RETRY_TAG. If the tag wasn't set for a run failure, we set it so that other daemons can use the + WILL_RETRY_TAG to determine if the run should be retried. + """ will_retry_tag_value = run.tags.get(WILL_RETRY_TAG) + run_failure_reason = ( + RunFailureReason(run.tags.get(RUN_FAILURE_REASON_TAG)) + if run.tags.get(RUN_FAILURE_REASON_TAG) + else None + ) if will_retry_tag_value is None: # If the run doesn't have the WILL_RETRY_TAG, and the run is failed, we # recalculate if the run should be retried to ensure backward compatibilty if run.status == DagsterRunStatus.FAILURE: - should_retry_run = auto_reexecution_should_retry_run(instance, run) + should_retry_run = auto_reexecution_should_retry_run(instance, run, run_failure_reason) # add the tag to the run so that it can be used in other parts of the system instance.add_run_tags(run.run_id, {WILL_RETRY_TAG: str(should_retry_run).lower()}) else: @@ -42,14 +52,14 @@ def should_retry(run: DagsterRun, instance: DagsterInstance) -> bool: if should_retry_run: return should_retry_run else: + # one of the reasons we may not retry a run is if it is a step failure and system is + # set to not retry on op/asset failures. In this case, we log + # an engine event retry_on_asset_or_op_failure = get_boolean_tag_value( run.tags.get(RETRY_ON_ASSET_OR_OP_FAILURE_TAG), default_value=instance.run_retries_retry_on_asset_or_op_failure, ) - if ( - run.tags.get(RUN_FAILURE_REASON_TAG) == RunFailureReason.STEP_FAILURE.value - and not retry_on_asset_or_op_failure - ): + if run_failure_reason == RunFailureReason.STEP_FAILURE and not retry_on_asset_or_op_failure: instance.report_engine_event( "Not retrying run since it failed due to an asset or op failure and run retries " "are configured with retry_on_asset_or_op_failure set to false.", diff --git a/python_modules/dagster/dagster/_generate/download.py b/python_modules/dagster/dagster/_generate/download.py index c8b7ca7223f81..24668dc7da416 100644 --- a/python_modules/dagster/dagster/_generate/download.py +++ b/python_modules/dagster/dagster/_generate/download.py @@ -37,6 +37,8 @@ "feature_graph_backed_assets", "getting_started_etl_tutorial", "project_analytics", + "project_atproto_dashboard", + "project_dagster_modal_pipes", "project_dagster_university_start", "project_du_dbt_starter", "project_fully_featured", diff --git a/python_modules/dagster/dagster/_serdes/serdes.py b/python_modules/dagster/dagster/_serdes/serdes.py index 741ff44487cf2..7cc5efe798e2f 100644 --- a/python_modules/dagster/dagster/_serdes/serdes.py +++ b/python_modules/dagster/dagster/_serdes/serdes.py @@ -1423,3 +1423,27 @@ def get_storage_name(klass: Type, *, whitelist_map: WhitelistMap = _WHITELIST_MA check.failed(f"{klass.__name__} is not a known serializable object type.") ser = whitelist_map.object_serializers[klass.__name__] return ser.get_storage_name() + + +def get_storage_fields(klass: Type, whitelist_map: WhitelistMap = _WHITELIST_MAP): + if klass.__name__ not in whitelist_map.object_serializers: + check.failed(f"{klass.__name__} is not a known serializable object type.") + ser = whitelist_map.object_serializers[klass.__name__] + # get the current fields + current_fields = ser.constructor_param_names + # remap defined storage field names + stored_fields = [ser.storage_field_names.get(f, f) for f in current_fields] + # insert old fields + stored_fields.extend(ser.old_fields.keys()) + # we sort_keys=True in json dump + return sorted(stored_fields) + + +_OBJECT_START = '{"__class__":' + + +def get_prefix_for_a_serialized(klass: Type, *, whitelist_map=_WHITELIST_MAP): + """Returns the expected start of the string for a serdes serialized object + of the passed type. + """ + return f'{_OBJECT_START} "{get_storage_name(klass, whitelist_map=whitelist_map)}"' diff --git a/python_modules/dagster/dagster/_utils/merger.py b/python_modules/dagster/dagster/_utils/merger.py index dc0e6401cde86..9995315a294a1 100644 --- a/python_modules/dagster/dagster/_utils/merger.py +++ b/python_modules/dagster/dagster/_utils/merger.py @@ -59,3 +59,13 @@ def merge_dicts(*args: Mapping[Any, Any]) -> Dict[Any, Any]: for arg in args: result.update(arg) return result + + +def reverse_dict(d: Mapping[V, K]) -> Dict[K, V]: + """Returns a new dictionary with the keys and values of the input dictionary swapped. + + If the input dictionary has duplicate values, the returned dictionary will have the value from + the last key that maps to it. + """ + check.dict_param(d, "d") + return {v: k for k, v in d.items()} diff --git a/python_modules/dagster/dagster_tests/api_tests/api_tests_repo.py b/python_modules/dagster/dagster_tests/api_tests/api_tests_repo.py index 8ad4e79a19c2d..0e45a4097d1a1 100644 --- a/python_modules/dagster/dagster_tests/api_tests/api_tests_repo.py +++ b/python_modules/dagster/dagster_tests/api_tests/api_tests_repo.py @@ -17,6 +17,7 @@ ) from dagster._core.definitions.asset_graph import AssetGraph from dagster._core.definitions.decorators.sensor_decorator import sensor +from dagster._core.definitions.metadata.metadata_value import MetadataValue from dagster._core.definitions.partition import PartitionedConfig, StaticPartitionsDefinition from dagster._core.definitions.sensor_definition import RunRequest from dagster._core.errors import DagsterError @@ -192,6 +193,12 @@ def sensor_raises_dagster_error(_): raise DagsterError("Dagster error") +@job(metadata={"pipeline_snapshot": MetadataValue.json({"pipeline_snapshot": "pipeline_snapshot"})}) +def pipeline_snapshot(): + do_something() + do_fail() + + @repository(metadata={"string": "foo", "integer": 123}) def bar_repo(): return { @@ -201,9 +208,10 @@ def bar_repo(): "dynamic_job": define_asset_job( "dynamic_job", [dynamic_asset], partitions_def=dynamic_partitions_def ).resolve(asset_graph=AssetGraph.from_assets([dynamic_asset])), - "fail": fail_job, + "fail_job": fail_job, "foo": foo_job, "forever": forever_job, + "pipeline_snapshot": pipeline_snapshot.get_subset(op_selection=["do_something"]), }, "schedules": define_bar_schedules(), "sensors": { diff --git a/python_modules/dagster/dagster_tests/api_tests/test_api_snapshot_repository.py b/python_modules/dagster/dagster_tests/api_tests/test_api_snapshot_repository.py index dc854e851f0f1..5f75fb0a7a0ca 100644 --- a/python_modules/dagster/dagster_tests/api_tests/test_api_snapshot_repository.py +++ b/python_modules/dagster/dagster_tests/api_tests/test_api_snapshot_repository.py @@ -15,12 +15,19 @@ RepositorySnap, ) from dagster._core.remote_representation.external import RemoteRepository -from dagster._core.remote_representation.external_data import JobDataSnap +from dagster._core.remote_representation.external_data import ( + DISABLE_FAST_EXTRACT_ENV_VAR, + JobDataSnap, + JobRefSnap, + extract_serialized_job_snap_from_serialized_job_data_snap, +) from dagster._core.remote_representation.handle import RepositoryHandle from dagster._core.remote_representation.origin import RemoteRepositoryOrigin from dagster._core.test_utils import instance_for_test from dagster._core.types.loadable_target_origin import LoadableTargetOrigin -from dagster._serdes.serdes import deserialize_value +from dagster._serdes.serdes import deserialize_value, get_storage_fields +from dagster._serdes.utils import hash_str +from dagster._utils.env import environ from dagster_tests.api_tests.utils import get_bar_repo_code_location @@ -121,8 +128,9 @@ def test_giant_external_repository_streaming_grpc(): assert repository_snap.name == "giant_repo" -def test_defer_snapshots(instance: DagsterInstance): - with get_bar_repo_code_location(instance) as code_location: +@pytest.mark.parametrize("env", [{}, {DISABLE_FAST_EXTRACT_ENV_VAR: "true"}]) +def test_defer_snapshots(instance: DagsterInstance, env): + with get_bar_repo_code_location(instance) as code_location, environ(env): repo_origin = RemoteRepositoryOrigin( code_location.origin, "bar_repo", @@ -135,16 +143,27 @@ def test_defer_snapshots(instance: DagsterInstance): _state = {} - def _ref_to_data(ref): + def _ref_to_data(ref: JobRefSnap): _state["cnt"] = _state.get("cnt", 0) + 1 reply = code_location.client.external_job( repo_origin, ref.name, ) - return deserialize_value(reply.serialized_job_data, JobDataSnap) + assert reply.serialized_job_data, reply.serialized_error + assert ( + hash_str( + extract_serialized_job_snap_from_serialized_job_data_snap( + reply.serialized_job_data + ) + ) + == ref.snapshot_id + ), ref.name + + job_data_snap = deserialize_value(reply.serialized_job_data, JobDataSnap) + return job_data_snap repository_snap = deserialize_value(ser_repo_data, RepositorySnap) - assert repository_snap.job_refs and len(repository_snap.job_refs) == 6 + assert repository_snap.job_refs and len(repository_snap.job_refs) == 7 assert repository_snap.job_datas is None repo = RemoteRepository( @@ -154,7 +173,7 @@ def _ref_to_data(ref): ref_to_data_fn=_ref_to_data, ) jobs = repo.get_all_jobs() - assert len(jobs) == 6 + assert len(jobs) == 7 assert _state.get("cnt", 0) == 0 job = jobs[0] @@ -177,6 +196,15 @@ def _ref_to_data(ref): assert _state.get("cnt", 0) == 1 # refetching job should share fetched data - job = repo.get_all_jobs()[0] - _ = job.job_snapshot - assert _state.get("cnt", 0) == 1 + expected = 1 # from job[0] access + for job in repo.get_all_jobs(): + _ = job.job_snapshot + assert _state.get("cnt", 0) == expected + expected += 1 + + +def test_job_data_snap_layout(): + # defend against assumptions made in + + # must remain last position + assert get_storage_fields(JobDataSnap)[-1] == "pipeline_snapshot" diff --git a/python_modules/dagster/dagster_tests/asset_defs_tests/test_antlr_asset_selection.py b/python_modules/dagster/dagster_tests/asset_defs_tests/test_antlr_asset_selection.py index f3c4fcb11935f..c72ea9451bcfe 100644 --- a/python_modules/dagster/dagster_tests/asset_defs_tests/test_antlr_asset_selection.py +++ b/python_modules/dagster/dagster_tests/asset_defs_tests/test_antlr_asset_selection.py @@ -144,11 +144,11 @@ def test_antlr_tree_invalid(selection_str): ), ("sinks(key:a)", AssetSelection.assets("a").sinks()), ("roots(key:c)", AssetSelection.assets("c").roots()), - ("tag:foo", AssetSelection.tag("foo", "")), - ("tag:foo=bar", AssetSelection.tag("foo", "bar")), + ("tag:foo", AssetSelection.tag("foo", "", include_sources=True)), + ("tag:foo=bar", AssetSelection.tag("foo", "bar", include_sources=True)), ('owner:"owner@owner.com"', AssetSelection.owner("owner@owner.com")), - ("group:my_group", AssetSelection.groups("my_group")), - ("kind:my_kind", AssetSelection.tag(f"{KIND_PREFIX}my_kind", "")), + ("group:my_group", AssetSelection.groups("my_group", include_sources=True)), + ("kind:my_kind", AssetSelection.tag(f"{KIND_PREFIX}my_kind", "", include_sources=True)), ( "code_location:my_location", CodeLocationAssetSelection(selected_code_location="my_location"), diff --git a/python_modules/dagster/dagster_tests/cli_tests/test_api_commands.py b/python_modules/dagster/dagster_tests/cli_tests/test_api_commands.py index 1c483394bd1c2..6fc1739fa8211 100644 --- a/python_modules/dagster/dagster_tests/cli_tests/test_api_commands.py +++ b/python_modules/dagster/dagster_tests/cli_tests/test_api_commands.py @@ -179,7 +179,7 @@ def test_execute_run_fail_job(): } ) as instance: with get_bar_repo_handle(instance) as repo_handle: - job_handle = JobHandle("fail", repo_handle) + job_handle = JobHandle("fail_job", repo_handle) runner = CliRunner() run = create_run_for_test( diff --git a/python_modules/dagster/dagster_tests/core_tests/instance_tests/test_instance.py b/python_modules/dagster/dagster_tests/core_tests/instance_tests/test_instance.py index 11d6c94ddafea..fa4e970511638 100644 --- a/python_modules/dagster/dagster_tests/core_tests/instance_tests/test_instance.py +++ b/python_modules/dagster/dagster_tests/core_tests/instance_tests/test_instance.py @@ -37,6 +37,7 @@ from dagster._core.launcher import LaunchRunContext, RunLauncher from dagster._core.run_coordinator.queued_run_coordinator import QueuedRunCoordinator from dagster._core.snap import create_execution_plan_snapshot_id, snapshot_from_execution_plan +from dagster._core.storage.asset_check_execution_record import AssetCheckExecutionRecordStatus from dagster._core.storage.partition_status_cache import AssetPartitionStatus, AssetStatusCacheValue from dagster._core.storage.sqlite_storage import ( _event_logs_directory, @@ -742,7 +743,7 @@ def test_get_status_by_partition(mock_get_and_update): assert partition_status == {"2023-07-01": AssetPartitionStatus.IN_PROGRESS} -def test_report_runless_asset_event(): +def test_report_runless_asset_event() -> None: with instance_for_test() as instance: my_asset_key = AssetKey("my_asset") @@ -768,6 +769,22 @@ def test_report_runless_asset_event(): limit=1, ) assert len(records) == 1 + assert records[0].status == AssetCheckExecutionRecordStatus.SUCCEEDED + + instance.report_runless_asset_event( + AssetCheckEvaluation( + asset_key=my_asset_key, + check_name=my_check, + passed=False, + metadata={}, + ) + ) + records = instance.event_log_storage.get_asset_check_execution_history( + check_key=AssetCheckKey(asset_key=my_asset_key, name=my_check), + limit=1, + ) + assert len(records) == 1 + assert records[0].status == AssetCheckExecutionRecordStatus.FAILED def test_invalid_run_id(): diff --git a/python_modules/dagster/dagster_tests/core_tests/pythonic_config_tests/test_basic_pythonic_config.py b/python_modules/dagster/dagster_tests/core_tests/pythonic_config_tests/test_basic_pythonic_config.py index 0098cb7588ac2..9e7941ed83a15 100644 --- a/python_modules/dagster/dagster_tests/core_tests/pythonic_config_tests/test_basic_pythonic_config.py +++ b/python_modules/dagster/dagster_tests/core_tests/pythonic_config_tests/test_basic_pythonic_config.py @@ -707,7 +707,7 @@ def test_structured_run_config_optional() -> None: class ANewConfigOpConfig(Config): a_string: Optional[str] an_int: Optional[int] = None - a_float: float = PyField(None) + a_float: float = PyField(None) # type: ignore executed = {} @@ -805,7 +805,7 @@ def my_asset(config: AnAssetConfig): def test_structured_run_config_assets_optional() -> None: class AnAssetConfig(Config): - a_string: str = PyField(None) + a_string: str = PyField(None) # type: ignore an_int: Optional[int] = None executed = {} diff --git a/python_modules/dagster/dagster_tests/daemon_tests/test_backfill.py b/python_modules/dagster/dagster_tests/daemon_tests/test_backfill.py index 9f6bad53bb2ed..740e8bb9ae0a3 100644 --- a/python_modules/dagster/dagster_tests/daemon_tests/test_backfill.py +++ b/python_modules/dagster/dagster_tests/daemon_tests/test_backfill.py @@ -11,6 +11,7 @@ from dagster import ( AllPartitionMapping, Any, + AssetDep, AssetExecutionContext, AssetIn, AssetKey, @@ -24,6 +25,7 @@ Out, Output, StaticPartitionMapping, + TimeWindowPartitionMapping, _seven, asset, daily_partitioned_config, @@ -68,6 +70,7 @@ DagsterRunStatus, RunsFilter, ) +from dagster._core.storage.partition_status_cache import AssetPartitionStatus from dagster._core.storage.tags import ( ASSET_PARTITION_RANGE_END_TAG, ASSET_PARTITION_RANGE_START_TAG, @@ -402,18 +405,40 @@ def daily_2(daily_1): partitions_def=daily_partitions_def, backfill_policy=BackfillPolicy.single_run(), ) -def asset_with_single_run_backfill_policy(): - return 1 +def asset_with_single_run_backfill_policy() -> None: + pass @asset( partitions_def=daily_partitions_def, backfill_policy=BackfillPolicy.multi_run(), ) -def asset_with_multi_run_backfill_policy(): +def asset_with_multi_run_backfill_policy() -> None: pass +@asset( + partitions_def=daily_partitions_def, + backfill_policy=BackfillPolicy.single_run(), + deps=[ + asset_with_single_run_backfill_policy, + AssetDep( + "complex_asset_with_backfill_policy", + partition_mapping=TimeWindowPartitionMapping(start_offset=-1, end_offset=-1), + ), + ], +) +def complex_asset_with_backfill_policy(context: AssetExecutionContext) -> None: + statuses = context.instance.get_status_by_partition( + asset_key=asset_with_single_run_backfill_policy.key, + partition_keys=context.partition_keys, + partitions_def=daily_partitions_def, + ) + assert statuses + context.log.info(f"got {statuses}") + assert all(status == AssetPartitionStatus.MATERIALIZED for status in statuses.values()) + + asset_job_partitions = StaticPartitionsDefinition(["a", "b", "c", "d"]) @@ -490,6 +515,7 @@ def the_repo(): downstream_of_fails_once_asset_c, asset_with_single_run_backfill_policy, asset_with_multi_run_backfill_policy, + complex_asset_with_backfill_policy, bp_single_run, bp_single_run_config, bp_multi_run, @@ -2276,6 +2302,75 @@ def test_asset_backfill_with_multi_run_backfill_policy( ] +def test_complex_asset_with_backfill_policy( + instance: DagsterInstance, workspace_context: WorkspaceProcessContext +): + # repro of bug + partitions = ["2023-01-01", "2023-01-02", "2023-01-03"] + asset_graph = workspace_context.create_request_context().asset_graph + + backfill_id = "complex_asset_with_backfills" + backfill = PartitionBackfill.from_partitions_by_assets( + backfill_id=backfill_id, + asset_graph=asset_graph, + backfill_timestamp=get_current_timestamp(), + tags={}, + dynamic_partitions_store=instance, + partitions_by_assets=[ + PartitionsByAssetSelector( + asset_key=asset_with_single_run_backfill_policy.key, + partitions=PartitionsSelector( + [PartitionRangeSelector(partitions[0], partitions[-1])] + ), + ), + PartitionsByAssetSelector( + asset_key=complex_asset_with_backfill_policy.key, + partitions=PartitionsSelector( + [PartitionRangeSelector(partitions[0], partitions[-1])] + ), + ), + ], + title=None, + description=None, + ) + instance.add_backfill(backfill) + + assert instance.get_runs_count() == 0 + backfill = instance.get_backfill(backfill_id) + assert backfill + assert backfill.status == BulkActionStatus.REQUESTED + assert backfill.asset_selection == [ + asset_with_single_run_backfill_policy.key, + complex_asset_with_backfill_policy.key, + ] + + assert all( + not error + for error in list( + execute_backfill_iteration( + workspace_context, get_default_daemon_logger("BackfillDaemon") + ) + ) + ) + + # 1 run for the full range + assert instance.get_runs_count() == 1 + wait_for_all_runs_to_start(instance, timeout=30) + wait_for_all_runs_to_finish(instance, timeout=30) + + assert all( + not error + for error in list( + execute_backfill_iteration( + workspace_context, get_default_daemon_logger("BackfillDaemon") + ) + ) + ) + backfill = instance.get_backfill(backfill_id) + assert backfill + assert backfill.status == BulkActionStatus.COMPLETED_SUCCESS + + def test_error_code_location( caplog, instance, workspace_context, unloadable_location_workspace_context ): @@ -2642,19 +2737,28 @@ def test_old_dynamic_partitions_job_backfill( assert instance.get_runs_count() == 4 +@pytest.fixture +def instance_with_backfill_log_storage_enabled(instance): + def override_backfill_storage_setting(self): + return True + + orig_backfill_storage_setting = instance.backfill_log_storage_enabled + + try: + instance.backfill_log_storage_enabled = override_backfill_storage_setting.__get__( + instance, DagsterInstance + ) + yield instance + finally: + instance.backfill_log_storage_enabled = orig_backfill_storage_setting + + def test_asset_backfill_logs( - instance: DagsterInstance, + instance_with_backfill_log_storage_enabled: DagsterInstance, workspace_context: WorkspaceProcessContext, remote_repo: RemoteRepository, ): - # need to override this method on the instance since it defaults ot False in OSS. When we enable this - # feature in OSS we can remove this override - def override_backfill_storage_setting(self): - return True - - instance.backfill_log_storage_enabled = override_backfill_storage_setting.__get__( - instance, DagsterInstance - ) + instance = instance_with_backfill_log_storage_enabled partition_keys = static_partitions.get_partition_keys() asset_selection = [AssetKey("foo"), AssetKey("a1"), AssetKey("bar")] diff --git a/python_modules/dagster/dagster_tests/definitions_tests/freshness_checks_tests/test_sensor.py b/python_modules/dagster/dagster_tests/definitions_tests/freshness_checks_tests/test_sensor.py index ca549b1c40df7..b7881d5a79ba9 100644 --- a/python_modules/dagster/dagster_tests/definitions_tests/freshness_checks_tests/test_sensor.py +++ b/python_modules/dagster/dagster_tests/definitions_tests/freshness_checks_tests/test_sensor.py @@ -21,12 +21,14 @@ from dagster._core.definitions.asset_out import AssetOut from dagster._core.definitions.decorators.asset_decorator import multi_asset from dagster._core.definitions.definitions_class import Definitions +from dagster._core.definitions.events import AssetMaterialization from dagster._core.definitions.metadata import FloatMetadataValue from dagster._core.definitions.run_request import RunRequest, SkipReason from dagster._core.definitions.sensor_definition import build_sensor_context from dagster._core.events import DagsterEvent, DagsterEventType from dagster._core.events.log import EventLogEntry -from dagster._core.test_utils import freeze_time +from dagster._core.storage.tags import SENSOR_NAME_TAG +from dagster._core.test_utils import create_run_for_test, freeze_time from dagster._core.utils import make_new_run_id from dagster._time import get_current_datetime @@ -132,7 +134,7 @@ def my_asset(): def test_sensor_evaluation_planned(instance: DagsterInstance) -> None: - """Test the case where the asset check is currently planned to evaluate. We shouldn't attempt to re-evalaute the check in this case.""" + """Test the case where the asset check is currently planned to evaluate, and has never previously evaluated. We shouldn't be kicking off a new run of the check.""" @asset def my_asset(): @@ -142,6 +144,7 @@ def my_asset(): assets=[my_asset], lower_bound_delta=datetime.timedelta(minutes=10) ) + # Check has never completed evaluation but is in flight. We should skip the check. frozen_time = get_current_datetime() with freeze_time(frozen_time): instance.event_log_storage.store_event( @@ -170,6 +173,238 @@ def my_asset(): assert context.cursor is None +def test_sensor_eval_planned_prev_success(instance: DagsterInstance) -> None: + """Test the case where the asset check is currently planned to evaluate, and has previously evaluated successfully. We should be kicking off a run of the check once the freshness interval has passed.""" + + @asset + def my_asset(): + pass + + freshness_checks = build_last_update_freshness_checks( + assets=[my_asset], lower_bound_delta=datetime.timedelta(minutes=10) + ) + + # Check has never completed evaluation but is in flight. We should skip the check. + frozen_time = get_current_datetime() + with freeze_time(frozen_time - datetime.timedelta(minutes=5)): + instance.report_runless_asset_event( + AssetCheckEvaluation( + asset_key=my_asset.key, + check_name="freshness_check", + passed=True, + metadata={ + FRESH_UNTIL_METADATA_KEY: FloatMetadataValue(frozen_time.timestamp() + 5) + }, + ) + ) + with freeze_time(frozen_time): + instance.event_log_storage.store_event( + EventLogEntry( + error_info=None, + user_message="", + level="debug", + run_id=make_new_run_id(), + timestamp=time.time(), + dagster_event=DagsterEvent( + DagsterEventType.ASSET_CHECK_EVALUATION_PLANNED.value, + "nonce", + event_specific_data=AssetCheckEvaluationPlanned( + asset_key=my_asset.key, check_name="freshness_check" + ), + ), + ) + ) + sensor = build_sensor_for_freshness_checks(freshness_checks=freshness_checks) + defs = Definitions(asset_checks=freshness_checks, assets=[my_asset], sensors=[sensor]) + context = build_sensor_context(instance=instance, definitions=defs) + + # Upon evaluation, we do not yet expect a run request, since the freshness interval has not yet passed. + result = sensor(context) + assert isinstance(result, SkipReason) + # Cursor should be None, since we made it through all assets. + assert context.cursor is None + + # Move time forward to when the check should be evaluated. + with freeze_time(frozen_time + datetime.timedelta(minutes=6)): + # Upon evaluation, we should get a run request for the asset check. + run_request = sensor(context) + assert isinstance(run_request, RunRequest) + assert run_request.asset_check_keys == [AssetCheckKey(my_asset.key, "freshness_check")] + # Cursor should be None, since we made it through all assets. + assert context.cursor is None + + +def test_sensor_eval_planned_prev_failed(instance: DagsterInstance) -> None: + """Test the case where the asset check is currently planned to evaluate, and has previously evaluated unsuccessfully. We should not be kicking off a run of the check.""" + + @asset + def my_asset(): + pass + + freshness_checks = build_last_update_freshness_checks( + assets=[my_asset], lower_bound_delta=datetime.timedelta(minutes=10) + ) + + # Check has never completed evaluation but is in flight. We should skip the check. + frozen_time = get_current_datetime() + with freeze_time(frozen_time - datetime.timedelta(minutes=5)): + instance.report_runless_asset_event( + AssetCheckEvaluation( + asset_key=my_asset.key, + check_name="freshness_check", + passed=False, + metadata={}, + ) + ) + with freeze_time(frozen_time): + instance.event_log_storage.store_event( + EventLogEntry( + error_info=None, + user_message="", + level="debug", + run_id=make_new_run_id(), + timestamp=time.time(), + dagster_event=DagsterEvent( + DagsterEventType.ASSET_CHECK_EVALUATION_PLANNED.value, + "nonce", + event_specific_data=AssetCheckEvaluationPlanned( + asset_key=my_asset.key, check_name="freshness_check" + ), + ), + ) + ) + sensor = build_sensor_for_freshness_checks(freshness_checks=freshness_checks) + defs = Definitions(asset_checks=freshness_checks, assets=[my_asset], sensors=[sensor]) + context = build_sensor_context(instance=instance, definitions=defs) + + # Upon evaluation, we should not get a run request for the asset check. + result = sensor(context) + assert isinstance(result, SkipReason) + # Cursor should be None, since we made it through all assets. + assert context.cursor is None + + +def test_sensor_eval_failed_and_outdated(instance: DagsterInstance) -> None: + """Test the case where the asset check has previously failed, but the result is now out of date. We should kick off a new check evaluation.""" + + @asset + def my_asset(): + pass + + freshness_checks = build_last_update_freshness_checks( + assets=[my_asset], lower_bound_delta=datetime.timedelta(minutes=10) + ) + + frozen_time = get_current_datetime() + with freeze_time(frozen_time - datetime.timedelta(minutes=5)): + instance.report_runless_asset_event( + AssetCheckEvaluation( + asset_key=my_asset.key, + check_name="freshness_check", + passed=False, + metadata={}, + ) + ) + # Freshness check has previously failed, but we've since received a materialization for the asset making it out of date. + with freeze_time(frozen_time): + instance.report_runless_asset_event(AssetMaterialization(asset_key=my_asset.key)) + sensor = build_sensor_for_freshness_checks(freshness_checks=freshness_checks) + defs = Definitions(asset_checks=freshness_checks, assets=[my_asset], sensors=[sensor]) + context = build_sensor_context(instance=instance, definitions=defs) + + # Upon evaluation, we should get a run request for the asset check. + run_request = sensor(context) + assert isinstance(run_request, RunRequest) + assert run_request.asset_check_keys == [AssetCheckKey(my_asset.key, "freshness_check")] + # Cursor should be None, since we made it through all assets. + assert context.cursor is None + + +def test_sensor_eval_planned_and_launched_by_sensor(instance: DagsterInstance) -> None: + """Test the case where the asset check is currently planned to evaluate, but the sensor is what launched the in-flight evaluation. We should not kick off a new evaluation.""" + + @asset + def my_asset(): + pass + + freshness_checks = build_last_update_freshness_checks( + assets=[my_asset], lower_bound_delta=datetime.timedelta(minutes=10) + ) + sensor = build_sensor_for_freshness_checks(freshness_checks=freshness_checks, name="my_sensor") + defs = Definitions(asset_checks=freshness_checks, assets=[my_asset], sensors=[sensor]) + + frozen_time = get_current_datetime() + with freeze_time(frozen_time - datetime.timedelta(minutes=5)): + run_id = make_new_run_id() + # Create a run, simulate started by this sensor + create_run_for_test( + instance=instance, + run_id=run_id, + job_name="my_sensor", + tags={SENSOR_NAME_TAG: "my_sensor"}, + ) + instance.event_log_storage.store_event( + EventLogEntry( + error_info=None, + user_message="", + level="debug", + run_id=run_id, + timestamp=time.time(), + dagster_event=DagsterEvent( + DagsterEventType.ASSET_CHECK_EVALUATION_PLANNED.value, + "nonce", + event_specific_data=AssetCheckEvaluationPlanned( + asset_key=my_asset.key, check_name="freshness_check" + ), + ), + ) + ) + + with freeze_time(frozen_time): + context = build_sensor_context(instance=instance, definitions=defs) + skip_reason = sensor(context) + assert isinstance(skip_reason, SkipReason) + + +def test_sensor_eval_success_and_outdated(instance: DagsterInstance) -> None: + """Test the case where the asset check has previously succeeded, but the result is now out of date. We should not kick off an evaluation unless FRESH_UNTIL_TIMESTAMP has passed.""" + + @asset + def my_asset(): + pass + + freshness_checks = build_last_update_freshness_checks( + assets=[my_asset], lower_bound_delta=datetime.timedelta(minutes=10) + ) + + frozen_time = get_current_datetime() + with freeze_time(frozen_time - datetime.timedelta(minutes=5)): + instance.report_runless_asset_event( + AssetCheckEvaluation( + asset_key=my_asset.key, + check_name="freshness_check", + passed=True, + metadata={ + FRESH_UNTIL_METADATA_KEY: FloatMetadataValue( + (frozen_time + datetime.timedelta(minutes=5)).timestamp() + ) + }, + ) + ) + # Freshness check has previously succeeded, but we've since received a materialization for the asset making it out of date. + with freeze_time(frozen_time): + instance.report_runless_asset_event(AssetMaterialization(asset_key=my_asset.key)) + sensor = build_sensor_for_freshness_checks(freshness_checks=freshness_checks) + defs = Definitions(asset_checks=freshness_checks, assets=[my_asset], sensors=[sensor]) + context = build_sensor_context(instance=instance, definitions=defs) + + # Upon evaluation, we should not get a run request for the asset check. + skip_reason = sensor(context) + assert isinstance(skip_reason, SkipReason) + # Cursor should be None, since we made it through all assets. + assert context.cursor is None + + def test_sensor_cursor_recovery(instance: DagsterInstance) -> None: """Test the case where we have a cursor to evaluate from.""" diff --git a/python_modules/dagster/dagster_tests/definitions_tests/test_asset_spec.py b/python_modules/dagster/dagster_tests/definitions_tests/test_asset_spec.py index ddbe373d04b22..a47062ec3a623 100644 --- a/python_modules/dagster/dagster_tests/definitions_tests/test_asset_spec.py +++ b/python_modules/dagster/dagster_tests/definitions_tests/test_asset_spec.py @@ -2,7 +2,14 @@ import dagster as dg import pytest -from dagster import AssetSpec, AutoMaterializePolicy, AutomationCondition +from dagster import ( + AssetSpec, + AutoMaterializePolicy, + AutomationCondition, + IdentityPartitionMapping, + LastPartitionMapping, +) +from dagster._check import CheckError from dagster._core.definitions.asset_dep import AssetDep from dagster._core.definitions.asset_key import AssetKey from dagster._core.definitions.assets import AssetsDefinition @@ -230,3 +237,123 @@ def my_other_multi_asset(): assert all( spec.owners == ["ben@dagsterlabs.com"] for asset in mapped_assets for spec in asset.specs ) + + +def test_map_asset_specs_additional_deps() -> None: + @dg.multi_asset(specs=[AssetSpec(key="a")]) + def my_asset(): + pass + + @dg.multi_asset(specs=[AssetSpec(key="c", deps=["a"])]) + def my_other_asset(): + pass + + assets = [my_asset, my_other_asset] + + mapped_assets = dg.map_asset_specs( + lambda spec: spec.merge_attributes(deps=["b"]) if spec.key == my_other_asset.key else spec, + assets, + ) + + c_asset = next(iter(asset for asset in mapped_assets if asset.key == my_other_asset.key)) + assert set(next(iter(c_asset.specs)).deps) == {AssetDep("a"), AssetDep("b")} + + +def test_map_asset_specs_multiple_deps_same_key() -> None: + @dg.multi_asset(specs=[AssetSpec(key="a", deps=[AssetDep("b")])]) + def my_asset(): + pass + + # This works because the dep is coerced to an identical object. + + dg.map_asset_specs(lambda spec: spec.merge_attributes(deps=[AssetKey("b")]), [my_asset]) + + # This doesn't work because we change the object. + with pytest.raises(DagsterInvariantViolationError): + dg.map_asset_specs( + lambda spec: spec.merge_attributes( + deps=[AssetDep(AssetKey("b"), partition_mapping=LastPartitionMapping())] + ), + [my_asset], + ) + + +def test_map_asset_specs_nonarg_dep_removal() -> None: + @dg.multi_asset(specs=[AssetSpec(key="a", deps=[AssetDep("b")])]) + def my_asset(): + pass + + new_asset = next( + iter(dg.map_asset_specs(lambda spec: spec.replace_attributes(deps=[]), [my_asset])) + ) + new_spec = next(iter(new_asset.specs)) + assert new_spec.deps == [] + # Ensure that dep removal propogated to the underlying op + assert new_asset.keys_by_input_name == {} + assert len(new_asset.op.input_defs) == 0 + + +def test_map_asset_specs_arg_dep_removal() -> None: + @dg.asset(key="a") + def my_asset(b): + pass + + with pytest.raises(CheckError): + dg.map_asset_specs(lambda spec: spec.replace_attributes(deps=[]), [my_asset]) + + +def test_map_additional_deps_partition_mapping() -> None: + @dg.multi_asset( + specs=[AssetSpec(key="a", deps=[AssetDep("b", partition_mapping=LastPartitionMapping())])] + ) + def my_asset(): + pass + + a_asset = next( + iter( + dg.map_asset_specs( + lambda spec: spec.merge_attributes( + deps=[AssetDep("c", partition_mapping=IdentityPartitionMapping())] + ), + [my_asset], + ) + ) + ) + a_spec = next(iter(a_asset.specs)) + b_dep = next(iter(dep for dep in a_spec.deps if dep.asset_key == AssetKey("b"))) + assert b_dep.partition_mapping == LastPartitionMapping() + c_dep = next(iter(dep for dep in a_spec.deps if dep.asset_key == AssetKey("c"))) + assert c_dep.partition_mapping == IdentityPartitionMapping() + assert a_asset.get_partition_mapping(AssetKey("c")) == IdentityPartitionMapping() + assert a_asset.get_partition_mapping(AssetKey("b")) == LastPartitionMapping() + + +def test_add_specs_non_executable_asset() -> None: + assets_def = ( + dg.Definitions(assets=[AssetSpec(key="foo")]) + .get_repository_def() + .assets_defs_by_key[AssetKey("foo")] + ) + foo_spec = next( + iter( + next( + iter( + dg.map_asset_specs(lambda spec: spec.merge_attributes(deps=["a"]), [assets_def]) + ) + ).specs + ) + ) + assert foo_spec.deps == [AssetDep("a")] + + +def test_graph_backed_asset_additional_deps() -> None: + @dg.op + def foo_op(): + pass + + @dg.graph_asset() + def foo(): + return foo_op() + + with pytest.raises(CheckError): + dg.map_asset_specs(lambda spec: spec.merge_attributes(deps=["baz"]), [foo]) diff --git a/python_modules/dagster/dagster_tests/definitions_tests/test_tags.py b/python_modules/dagster/dagster_tests/definitions_tests/test_tags.py index 17d72c3baec15..cf2fdb1ae96ed 100644 --- a/python_modules/dagster/dagster_tests/definitions_tests/test_tags.py +++ b/python_modules/dagster/dagster_tests/definitions_tests/test_tags.py @@ -1,4 +1,7 @@ -from dagster import job, op +import pytest +from dagster import ScheduleDefinition, job, op +from dagster._core.errors import DagsterInvalidDefinitionError +from dagster._core.storage.tags import RESUME_RETRY_TAG, RETRY_STRATEGY_TAG def test_op_tags(): @@ -45,3 +48,22 @@ def no_tags_job(): noop_op() assert no_tags_job.get_subset(op_selection=["noop_op"]).tags == {} + + +def test_user_editable_system_tags(): + @op + def noop_op(_): + pass + + @job + def noop_job(): + noop_op() + + ScheduleDefinition( + job=noop_job, cron_schedule="* * * * *", tags={RETRY_STRATEGY_TAG: "ALL_STEPS"} + ) + + with pytest.raises( + DagsterInvalidDefinitionError, match="Attempted to set tag with reserved system prefix" + ): + ScheduleDefinition(job=noop_job, cron_schedule="* * * * *", tags={RESUME_RETRY_TAG: "true"}) diff --git a/python_modules/dagster/dagster_tests/general_tests/test_serdes.py b/python_modules/dagster/dagster_tests/general_tests/test_serdes.py index e39e158e89961..57bc1ac5a378a 100644 --- a/python_modules/dagster/dagster_tests/general_tests/test_serdes.py +++ b/python_modules/dagster/dagster_tests/general_tests/test_serdes.py @@ -22,6 +22,7 @@ WhitelistMap, _whitelist_for_serdes, deserialize_value, + get_prefix_for_a_serialized, get_storage_name, pack_value, serialize_value, @@ -445,6 +446,7 @@ class Foo(NamedTuple): val = Foo("red") serialized = serialize_value(val, whitelist_map=test_map) + assert serialized.startswith(get_prefix_for_a_serialized(Foo, whitelist_map=test_map)) deserialized = deserialize_value(serialized, whitelist_map=test_map) assert deserialized == val @@ -465,12 +467,14 @@ class Bar(NamedTuple): val = Foo("red") serialized = serialize_value(val, whitelist_map=test_env) assert serialized == '{"__class__": "Bar", "color": "red"}' + assert serialized.startswith(get_prefix_for_a_serialized(Foo, whitelist_map=test_env)) deserialized = deserialize_value(serialized, whitelist_map=test_env) assert deserialized == val val = Bar("square") serialized = serialize_value(val, whitelist_map=test_env) assert serialized == '{"__class__": "Foo", "shape": "square"}' + assert serialized.startswith(get_prefix_for_a_serialized(Bar, whitelist_map=test_env)) deserialized = deserialize_value(serialized, whitelist_map=test_env) assert deserialized == val diff --git a/python_modules/dagster/dagster_tests/storage_tests/test_upath_io_manager.py b/python_modules/dagster/dagster_tests/storage_tests/test_upath_io_manager.py index da3b9ee155a37..be3858701e26f 100644 --- a/python_modules/dagster/dagster_tests/storage_tests/test_upath_io_manager.py +++ b/python_modules/dagster/dagster_tests/storage_tests/test_upath_io_manager.py @@ -464,7 +464,7 @@ def my_asset() -> Any: class AsyncJSONIOManager(ConfigurableIOManager, UPathIOManager): - base_dir: str = PydanticField(None, description="Base directory for storing files.") + base_dir: str = PydanticField(None, description="Base directory for storing files.") # type: ignore _base_path: UPath = PrivateAttr() diff --git a/python_modules/dagster/dagster_tests/storage_tests/utils/event_log_storage.py b/python_modules/dagster/dagster_tests/storage_tests/utils/event_log_storage.py index 55b130a2c0bef..1bfbd2ec23fbc 100644 --- a/python_modules/dagster/dagster_tests/storage_tests/utils/event_log_storage.py +++ b/python_modules/dagster/dagster_tests/storage_tests/utils/event_log_storage.py @@ -271,7 +271,13 @@ def _default_loggers(event_callback): # This exists to create synthetic events to test the store def _synthesize_events( - ops_fn_or_assets, run_id=None, check_success=True, instance=None, run_config=None, tags=None + ops_fn_or_assets, + run_id=None, + check_success=True, + instance=None, + run_config=None, + tags=None, + job_name=None, ) -> Tuple[List[EventLogEntry], JobExecutionResult]: events = [] @@ -290,6 +296,7 @@ def _append_event(event): else: # op_fn @job( + name=job_name, resource_defs=_default_resources(), logger_defs=_default_loggers(_append_event), executor_def=in_process_executor, @@ -2058,6 +2065,61 @@ def _get_storage_ids(result): ) assert _get_storage_ids(result) == [storage_id_3, storage_id_1] + def test_fetch_run_status_monitor_filters(self, storage, instance): + if not storage.supports_run_status_change_job_name_filter: + # test sqlite in test_get_event_records_sqlite + pytest.skip() + + @op + def my_op(_): + yield Output(1) + + def _ops(): + my_op() + + def _store_run_events(run_id, job_name=None): + events, _ = _synthesize_events(_ops, run_id=run_id, job_name=job_name) + for event in events: + storage.store_event(event) + + # store events for three runs + [run_id_1, run_id_2, run_id_3] = [ + make_new_run_id(), + make_new_run_id(), + make_new_run_id(), + ] + + with create_and_delete_test_runs(instance, [run_id_1, run_id_2, run_id_3]): + _store_run_events(run_id_1, "job_one") + _store_run_events(run_id_2, "job_two") + _store_run_events(run_id_3, "job_three") + + result = storage.fetch_run_status_changes( + DagsterEventType.RUN_SUCCESS, + limit=100, + ) + assert [r.event_log_entry.run_id for r in result.records] == [ + run_id_3, + run_id_2, + run_id_1, + ] + result = storage.fetch_run_status_changes( + RunStatusChangeRecordsFilter( + DagsterEventType.RUN_SUCCESS, + job_names=["job_one"], + ), + limit=100, + ) + assert [r.event_log_entry.run_id for r in result.records] == [run_id_1] + result = storage.fetch_run_status_changes( + RunStatusChangeRecordsFilter( + DagsterEventType.RUN_SUCCESS, + job_names=["job_one", "job_two"], + ), + limit=100, + ) + assert [r.event_log_entry.run_id for r in result.records] == [run_id_2, run_id_1] + def test_get_event_records_sqlite(self, storage, instance): if not self.is_sqlite(storage): pytest.skip() @@ -5220,6 +5282,19 @@ def test_asset_checks( assert checks[0].run_id == run_id_1 assert checks[0].event assert checks[0].event.dagster_event_type == DagsterEventType.ASSET_CHECK_EVALUATION_PLANNED + checks_filtered = storage.get_asset_check_execution_history( + check_key_1, limit=10, status={AssetCheckExecutionRecordStatus.PLANNED} + ) + assert len(checks_filtered) == 1 + checks_filtered_2 = storage.get_asset_check_execution_history( + check_key_1, + limit=10, + status={ + AssetCheckExecutionRecordStatus.SUCCEEDED, + AssetCheckExecutionRecordStatus.FAILED, + }, + ) + assert len(checks_filtered_2) == 0 latest_checks = storage.get_latest_asset_check_execution_by_key([check_key_1, check_key_2]) assert len(latest_checks) == 1 @@ -5259,6 +5334,19 @@ def test_asset_checks( check_data = checks[0].event.dagster_event.asset_check_evaluation_data assert check_data.target_materialization_data assert check_data.target_materialization_data.storage_id == 42 + filtered_checks = storage.get_asset_check_execution_history( + check_key_1, limit=10, status={AssetCheckExecutionRecordStatus.SUCCEEDED} + ) + assert len(filtered_checks) == 1 + filtered_checks_2 = storage.get_asset_check_execution_history( + check_key_1, + limit=10, + status={ + AssetCheckExecutionRecordStatus.FAILED, + AssetCheckExecutionRecordStatus.PLANNED, + }, + ) + assert len(filtered_checks_2) == 0 latest_checks = storage.get_latest_asset_check_execution_by_key([check_key_1, check_key_2]) assert len(latest_checks) == 1 @@ -5556,6 +5644,11 @@ def test_asset_check_summary_record( == AssetCheckExecutionRecordStatus.SUCCEEDED ) assert check_1_summary_record.last_check_execution_record.run_id == run_id_0 + assert ( + check_1_summary_record.last_completed_check_execution_record + == check_1_summary_record.last_check_execution_record + ) + assert check_1_summary_record.last_completed_run_id == run_id_0 check_2_summary_record = summary_records[check_key_2] assert check_2_summary_record.last_check_execution_record @@ -5623,6 +5716,7 @@ def test_asset_check_summary_record( ) # Check that the summary record for check_key_1 has been updated + old_check_1_summary_record = check_1_summary_record records = storage.get_asset_check_summary_records(asset_check_keys=[check_key_1]) assert len(records) == 1 check_1_summary_record = records[check_key_1] @@ -5632,6 +5726,12 @@ def test_asset_check_summary_record( == AssetCheckExecutionRecordStatus.PLANNED ) assert check_1_summary_record.last_check_execution_record.run_id == run_id_1 + # The latest completed data should not be updated. + assert ( + check_1_summary_record.last_completed_check_execution_record + == old_check_1_summary_record.last_check_execution_record + ) + assert check_1_summary_record.last_completed_run_id == run_id_0 def test_large_asset_metadata( self, diff --git a/python_modules/dagster/setup.py b/python_modules/dagster/setup.py index 62c5322540faf..5b0634b8ca030 100644 --- a/python_modules/dagster/setup.py +++ b/python_modules/dagster/setup.py @@ -83,7 +83,7 @@ def get_version() -> str: # core (not explicitly expressed atm) # pin around issues in specific versions of alembic that broke our migrations "alembic>=1.2.1,!=1.6.3,!=1.7.0,!=1.11.0", - "croniter>=0.3.34,<4", + "croniter>=0.3.34,!=4.0.0,<6", f"grpcio>={GRPC_VERSION_FLOOR}", f"grpcio-health-checking>={GRPC_VERSION_FLOOR}", "packaging>=20.9", diff --git a/python_modules/libraries/dagster-airbyte/dagster_airbyte/resources.py b/python_modules/libraries/dagster-airbyte/dagster_airbyte/resources.py index 0625b7c1073b0..d17f975d3586a 100644 --- a/python_modules/libraries/dagster-airbyte/dagster_airbyte/resources.py +++ b/python_modules/libraries/dagster-airbyte/dagster_airbyte/resources.py @@ -1018,7 +1018,7 @@ class AirbyteCloudWorkspace(ConfigurableResource): description="Time (in seconds) after which the requests to Airbyte are declared timed out.", ) - _client: AirbyteCloudClient = PrivateAttr(default=None) + _client: AirbyteCloudClient = PrivateAttr(default=None) # type: ignore @cached_method def get_client(self) -> AirbyteCloudClient: diff --git a/python_modules/libraries/dagster-aws/dagster_aws/ecs/__init__.py b/python_modules/libraries/dagster-aws/dagster_aws/ecs/__init__.py index 1c6711d03db3e..bb36e1a1c554f 100644 --- a/python_modules/libraries/dagster-aws/dagster_aws/ecs/__init__.py +++ b/python_modules/libraries/dagster-aws/dagster_aws/ecs/__init__.py @@ -1,2 +1,3 @@ +from dagster_aws.ecs.executor import ecs_executor as ecs_executor from dagster_aws.ecs.launcher import EcsRunLauncher as EcsRunLauncher from dagster_aws.ecs.tasks import EcsEventualConsistencyTimeout as EcsEventualConsistencyTimeout diff --git a/python_modules/libraries/dagster-aws/dagster_aws/ecs/executor.py b/python_modules/libraries/dagster-aws/dagster_aws/ecs/executor.py new file mode 100644 index 0000000000000..1dc2b41761b0f --- /dev/null +++ b/python_modules/libraries/dagster-aws/dagster_aws/ecs/executor.py @@ -0,0 +1,446 @@ +import json +import os +from typing import Any, Dict, Iterator, List, Mapping, Optional, Sequence, cast + +import boto3 +from dagster import ( + DagsterInvariantViolationError, + DagsterRun, + Field, + IntSource, + Permissive, + _check as check, + executor, +) +from dagster._annotations import experimental +from dagster._core.definitions.executor_definition import multiple_process_executor_requirements +from dagster._core.definitions.metadata import MetadataValue +from dagster._core.events import DagsterEvent, EngineEventData +from dagster._core.execution.retries import RetryMode, get_retries_config +from dagster._core.execution.tags import get_tag_concurrency_limits_config +from dagster._core.executor.base import Executor +from dagster._core.executor.init import InitExecutorContext +from dagster._core.executor.step_delegating import ( + CheckStepHealthResult, + StepDelegatingExecutor, + StepHandler, + StepHandlerContext, +) +from dagster._utils.backoff import backoff +from dagster._utils.merger import deep_merge_dicts + +from dagster_aws.ecs.container_context import EcsContainerContext +from dagster_aws.ecs.launcher import STOPPED_STATUSES, EcsRunLauncher +from dagster_aws.ecs.tasks import ( + get_current_ecs_task, + get_current_ecs_task_metadata, + get_task_kwargs_from_current_task, +) +from dagster_aws.ecs.utils import RetryableEcsException, run_ecs_task + +DEFAULT_STEP_TASK_RETRIES = "5" + + +_ECS_EXECUTOR_CONFIG_SCHEMA = { + "run_task_kwargs": Field( + Permissive({}), + is_required=False, + description=( + "Additional arguments to which can be set to the boto3 run_task call. Will override values inherited from the ECS run launcher." + " https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ecs.html#ECS.Client.run_task" + " for the available parameters." + ), + ), + "cpu": Field(IntSource, is_required=False), + "memory": Field(IntSource, is_required=False), + "ephemeral_storage": Field(IntSource, is_required=False), + "task_overrides": Field( + Permissive({}), + is_required=False, + ), + "retries": get_retries_config(), + "max_concurrent": Field( + IntSource, + is_required=False, + description=( + "Limit on the number of tasks that will run concurrently within the scope " + "of a Dagster run. Note that this limit is per run, not global." + ), + ), + "tag_concurrency_limits": get_tag_concurrency_limits_config(), +} + + +@executor( + name="ecs", + config_schema=_ECS_EXECUTOR_CONFIG_SCHEMA, + requirements=multiple_process_executor_requirements(), +) +@experimental +def ecs_executor(init_context: InitExecutorContext) -> Executor: + """Executor which launches steps as ECS tasks. + + To use the `ecs_executor`, set it as the `executor_def` when defining a job: + + .. literalinclude:: ../../../../../../python_modules/libraries/dagster-aws/dagster_aws_tests/ecs_tests/launcher_tests/executor_tests/test_example_executor_mode_def.py + :start-after: start_marker + :end-before: end_marker + :language: python + + Then you can configure the executor with run config as follows: + + .. code-block:: YAML + + execution: + config: + cpu: 1024 + memory: 2048 + ephemeral_storage: 10 + task_overrides: + containerOverrides: + - name: run + environment: + - name: MY_ENV_VAR + value: "my_value" + + `max_concurrent` limits the number of ECS tasks that will execute concurrently for one run. By default + there is no limit- it will maximally parallel as allowed by the DAG. Note that this is not a + global limit. + + Configuration set on the ECS tasks created by the `ECSRunLauncher` will also be + set on the tasks created by the `ecs_executor`. + + Configuration set using `tags` on a `@job` will only apply to the `run` level. For configuration + to apply at each `step` it must be set using `tags` for each `@op`. + """ + run_launcher = init_context.instance.run_launcher + + check.invariant( + isinstance(run_launcher, EcsRunLauncher), + "Using the ecs_executor currently requires that the run be launched in an ECS task via the EcsRunLauncher.", + ) + + exc_cfg = init_context.executor_config + + return StepDelegatingExecutor( + EcsStepHandler( + run_launcher=run_launcher, # type: ignore + run_task_kwargs=exc_cfg.get("run_task_kwargs"), # type: ignore + cpu=exc_cfg.get("cpu"), # type: ignore + memory=exc_cfg.get("memory"), # type: ignore + ephemeral_storage=exc_cfg.get("ephemeral_storage"), # type: ignore + task_overrides=exc_cfg.get("task_overrides"), # type:ignore + ), + retries=RetryMode.from_config(exc_cfg["retries"]), # type: ignore + max_concurrent=check.opt_int_elem(exc_cfg, "max_concurrent"), + tag_concurrency_limits=check.opt_list_elem(exc_cfg, "tag_concurrency_limits"), + should_verify_step=True, + ) + + +@experimental +class EcsStepHandler(StepHandler): + @property + def name(self): + return "EcsStepHandler" + + def __init__( + self, + run_launcher: EcsRunLauncher, + run_task_kwargs: Optional[Mapping[str, Any]], + cpu: Optional[int], + memory: Optional[int], + ephemeral_storage: Optional[int], + task_overrides: Optional[Mapping[str, Any]], + ): + super().__init__() + + run_task_kwargs = run_task_kwargs or {} + + self.ecs = boto3.client("ecs") + self.ec2 = boto3.resource("ec2") + + # confusingly, run_task expects cpu and memory value as strings + self._cpu = str(cpu) if cpu else None + self._memory = str(memory) if memory else None + + self._ephemeral_storage = ephemeral_storage + self._task_overrides = check.opt_mapping_param(task_overrides, "task_overrides") + + current_task_metadata = get_current_ecs_task_metadata() + current_task = get_current_ecs_task( + self.ecs, current_task_metadata.task_arn, current_task_metadata.cluster + ) + + if run_launcher.use_current_ecs_task_config: + current_task_kwargs = get_task_kwargs_from_current_task( + self.ec2, + current_task_metadata.cluster, + current_task, + ) + else: + current_task_kwargs = {} + + run_launcher_kwargs = {**current_task_kwargs, **run_launcher.run_task_kwargs} + + self._cluster_arn = current_task["clusterArn"] + self._task_definition_arn = current_task["taskDefinitionArn"] + + self._run_task_kwargs = { + "taskDefinition": current_task["taskDefinitionArn"], + **run_launcher_kwargs, + **run_task_kwargs, + } + + # TODO: change launch_step to return task ARN + # this will be a breaking change so we need to wait for a minor release + # to do this + self._launched_tasks = {} + + def _get_run_task_kwargs( + self, + run: DagsterRun, + args: Sequence[str], + step_key: str, + step_tags: Mapping[str, str], + step_handler_context: StepHandlerContext, + container_context: EcsContainerContext, + ): + run_launcher = check.inst( + step_handler_context.instance.run_launcher, + EcsRunLauncher, + "ECS executor can only be enabled with the ECS run launcher", + ) + + run_task_kwargs = self._run_task_kwargs + + kwargs_from_tags = step_tags.get("ecs/run_task_kwargs") + if kwargs_from_tags: + run_task_kwargs = {**run_task_kwargs, **json.loads(kwargs_from_tags)} + + # convert tags to a dictionary for easy value overriding + tags = { + **{tag["key"]: tag["value"] for tag in run_task_kwargs.get("tags", [])}, + **{ + tag["key"]: tag["value"] + for tag in run_launcher.build_ecs_tags_for_run_task(run, container_context) + }, + **step_handler_context.dagster_run.dagster_execution_info, + "dagster/step-key": step_key, + "dagster/step-id": self._get_step_id(step_handler_context), + } + + run_task_kwargs["tags"] = [ + { + "key": key, + "value": value, + } + for key, value in tags.items() + ] + + task_overrides = self._get_task_overrides(step_tags) or {} + + task_overrides["containerOverrides"] = task_overrides.get("containerOverrides", []) + + # container name has to match since we are assuming we are using the same task + executor_container_name = run_launcher.get_container_name(container_context) + executor_env_vars = [ + {"name": env["name"], "value": env["value"]} + for env in step_handler_context.execute_step_args.get_command_env() + ] + + # inject Executor command and env vars into the container overrides + # if they are defined + # otherwise create a new container overrides for the executor container + for container_overrides in task_overrides["containerOverrides"]: + # try to update existing container overrides for the executor container + if container_overrides["name"] == executor_container_name: + if "command" in container_overrides and container_overrides["command"] != args: + raise DagsterInvariantViolationError( + f"The 'command' field for {executor_container_name} container is not allowed in the 'containerOverrides' field of the task overrides." + ) + + # update environment variables & command + container_overrides["command"] = args + container_overrides["environment"] = ( + container_overrides.get("environment", []) + executor_env_vars + ) + break + # if no existing container overrides for the executor container, add new container overrides + else: + task_overrides["containerOverrides"].append( + { + "name": executor_container_name, + "command": args, + "environment": executor_env_vars, + } + ) + + run_task_kwargs["overrides"] = deep_merge_dicts( + run_task_kwargs.get("overrides", {}), task_overrides + ) + + return run_task_kwargs + + def _get_task_overrides(self, step_tags: Mapping[str, str]) -> Dict[str, Any]: + overrides = {**self._task_overrides} + + cpu = step_tags.get("ecs/cpu", self._cpu) + memory = step_tags.get("ecs/memory", self._memory) + + if cpu: + overrides["cpu"] = cpu + if memory: + overrides["memory"] = memory + + ephemeral_storage = step_tags.get("ecs/ephemeral_storage", self._ephemeral_storage) + + if ephemeral_storage: + overrides["ephemeralStorage"] = {"sizeInGiB": int(ephemeral_storage)} + + if tag_overrides := step_tags.get("ecs/task_overrides"): + overrides = deep_merge_dicts(overrides, json.loads(tag_overrides)) + + return overrides + + def _get_step_id(self, step_handler_context: StepHandlerContext): + """Step ID is used to identify the ECS task in the ECS cluster. + It is unique to specific step being executed and takes into account op-level retries. + It's used as a workaround to avoid having to return task ARN from launch_step. + """ + step_key = self._get_step_key(step_handler_context) + + if step_handler_context.execute_step_args.known_state: + retry_count = step_handler_context.execute_step_args.known_state.get_retry_state().get_attempt_count( + step_key + ) + else: + retry_count = 0 + + return "%s-%d" % (step_key, retry_count) + + def _get_step_key(self, step_handler_context: StepHandlerContext) -> str: + step_keys_to_execute = cast( + List[str], step_handler_context.execute_step_args.step_keys_to_execute + ) + assert len(step_keys_to_execute) == 1, "Launching multiple steps is not currently supported" + return step_keys_to_execute[0] + + def _get_container_context( + self, step_handler_context: StepHandlerContext + ) -> EcsContainerContext: + return EcsContainerContext.create_for_run( + step_handler_context.dagster_run, + cast(EcsRunLauncher, step_handler_context.instance.run_launcher), + ) + + def _run_task(self, **run_task_kwargs): + return run_ecs_task(self.ecs, run_task_kwargs) + + def launch_step(self, step_handler_context: StepHandlerContext) -> Iterator[DagsterEvent]: + step_key = self._get_step_key(step_handler_context) + + step_tags = step_handler_context.step_tags[step_key] + + container_context = self._get_container_context(step_handler_context) + + run = step_handler_context.dagster_run + + args = step_handler_context.execute_step_args.get_command_args( + skip_serialized_namedtuple=True + ) + + run_task_kwargs = self._get_run_task_kwargs( + run, + args, + step_key, + step_tags, + step_handler_context=step_handler_context, + container_context=container_context, + ) + + task = backoff( + self._run_task, + retry_on=(RetryableEcsException,), + kwargs=run_task_kwargs, + max_retries=int( + os.getenv("STEP_TASK_RETRIES", DEFAULT_STEP_TASK_RETRIES), + ), + ) + + yield DagsterEvent.step_worker_starting( + step_handler_context.get_step_context(step_key), + message=f'Executing step "{step_key}" in ECS task.', + metadata={ + "Task ARN": MetadataValue.text(task["taskArn"]), + }, + ) + + step_id = self._get_step_id(step_handler_context) + + self._launched_tasks[step_id] = task["taskArn"] + + def check_step_health(self, step_handler_context: StepHandlerContext) -> CheckStepHealthResult: + step_key = self._get_step_key(step_handler_context) + step_id = self._get_step_id(step_handler_context) + + try: + task_arn = self._launched_tasks[step_id] + except KeyError: + return CheckStepHealthResult.unhealthy( + reason=f"Task ARN for step {step_key} could not be found in executor's task map. This is likely a bug." + ) + + cluster_arn = self._cluster_arn + + tasks = self.ecs.describe_tasks(tasks=[task_arn], cluster=cluster_arn).get("tasks") + + if not tasks: + return CheckStepHealthResult.unhealthy( + reason=f"Task {task_arn} for step {step_key} could not be found." + ) + + t = tasks[0] + if t.get("lastStatus") in STOPPED_STATUSES: + failed_containers = [] + for c in t.get("containers"): + if c.get("exitCode") != 0: + failed_containers.append(c) + if len(failed_containers) > 0: + cluster_failure_info = ( + f"Task {t.get('taskArn')} failed.\n" + f"Stop code: {t.get('stopCode')}.\n" + f"Stop reason: {t.get('stoppedReason')}.\n" + ) + for c in failed_containers: + exit_code = c.get("exitCode") + exit_code_msg = f" - exit code {exit_code}" if exit_code is not None else "" + cluster_failure_info += f"Container '{c.get('name')}' failed{exit_code_msg}.\n" + + return CheckStepHealthResult.unhealthy(reason=cluster_failure_info) + + return CheckStepHealthResult.healthy() + + def terminate_step( + self, + step_handler_context: StepHandlerContext, + ) -> None: + step_id = self._get_step_id(step_handler_context) + step_key = self._get_step_key(step_handler_context) + + try: + task_arn = self._launched_tasks[step_id] + except KeyError: + raise DagsterInvariantViolationError( + f"Task ARN for step {step_key} could not be found in executor's task map. This is likely a bug." + ) + + cluster_arn = self._cluster_arn + + DagsterEvent.engine_event( + step_handler_context.get_step_context(step_key), + message=f"Stopping task {task_arn} for step", + event_specific_data=EngineEventData(), + ) + + self.ecs.stop_task(task=task_arn, cluster=cluster_arn) diff --git a/python_modules/libraries/dagster-aws/dagster_aws/ecs/launcher.py b/python_modules/libraries/dagster-aws/dagster_aws/ecs/launcher.py index 11ad43735a8ee..4d60f6c771848 100644 --- a/python_modules/libraries/dagster-aws/dagster_aws/ecs/launcher.py +++ b/python_modules/libraries/dagster-aws/dagster_aws/ecs/launcher.py @@ -48,7 +48,13 @@ get_task_definition_dict_from_current_task, get_task_kwargs_from_current_task, ) -from dagster_aws.ecs.utils import get_task_definition_family, get_task_logs, task_definitions_match +from dagster_aws.ecs.utils import ( + RetryableEcsException, + get_task_definition_family, + get_task_logs, + run_ecs_task, + task_definitions_match, +) from dagster_aws.secretsmanager import get_secrets_from_arns Tags = namedtuple("Tags", ["arn", "cluster", "cpu", "memory"]) @@ -74,9 +80,6 @@ DEFAULT_RUN_TASK_RETRIES = 5 -class RetryableEcsException(Exception): ... - - class EcsRunLauncher(RunLauncher[T_DagsterInstance], ConfigurableClass): """RunLauncher that starts a task in ECS for each Dagster job run.""" @@ -429,39 +432,17 @@ def _get_run_tags(self, run_id: str) -> Tags: def _get_command_args(self, run_args: ExecuteRunArgs, context: LaunchRunContext): return run_args.get_command_args() - def _get_image_for_run(self, context: LaunchRunContext) -> Optional[str]: - job_origin = check.not_none(context.job_code_origin) - return job_origin.repository_origin.container_image + def get_image_for_run(self, context: LaunchRunContext) -> Optional[str]: + """Child classes can override this method to determine the image to use for a run. This is considered a public API.""" + run = context.dagster_run + return ( + run.job_code_origin.repository_origin.container_image + if run.job_code_origin is not None + else None + ) def _run_task(self, **run_task_kwargs): - response = self.ecs.run_task(**run_task_kwargs) - - tasks = response["tasks"] - - if not tasks: - failures = response["failures"] - failure_messages = [] - for failure in failures: - arn = failure.get("arn") - reason = failure.get("reason") - detail = failure.get("detail") - - failure_message = ( - "Task" - + (f" {arn}" if arn else "") - + " failed." - + (f" Failure reason: {reason}" if reason else "") - + (f" Failure details: {detail}" if detail else "") - ) - failure_messages.append(failure_message) - - failure_message = "\n".join(failure_messages) if failure_messages else "Task failed." - - if "Capacity is unavailable at this time" in failure_message: - raise RetryableEcsException(failure_message) - - raise Exception(failure_message) - return tasks[0] + return run_ecs_task(self.ecs, run_task_kwargs) def launch_run(self, context: LaunchRunContext) -> None: """Launch a run in an ECS task.""" @@ -487,7 +468,7 @@ def launch_run(self, context: LaunchRunContext) -> None: instance_ref=self._instance.get_ref(), ) command = self._get_command_args(args, context) - image = self._get_image_for_run(context) + image = self.get_image_for_run(context) run_task_kwargs = self._run_task_kwargs(run, image, container_context) @@ -499,7 +480,7 @@ def launch_run(self, context: LaunchRunContext) -> None: container_overrides: List[Dict[str, Any]] = [ { - "name": self._get_container_name(container_context), + "name": self.get_container_name(container_context), "command": command, # containerOverrides expects cpu/memory as integers **{k: int(v) for k, v in cpu_and_memory_overrides.items()}, @@ -644,7 +625,7 @@ def _get_current_task(self): def _get_run_task_definition_family(self, run: DagsterRun) -> str: return get_task_definition_family("run", check.not_none(run.remote_job_origin)) - def _get_container_name(self, container_context: EcsContainerContext) -> str: + def get_container_name(self, container_context: EcsContainerContext) -> str: return container_context.container_name or self.container_name def _run_task_kwargs( @@ -675,7 +656,7 @@ def _run_task_kwargs( task_definition_config = DagsterEcsTaskDefinitionConfig( family, image, - self._get_container_name(container_context), + self.get_container_name(container_context), command=None, log_configuration=( { @@ -715,7 +696,7 @@ def _run_task_kwargs( family, self._get_current_task(), image, - self._get_container_name(container_context), + self.get_container_name(container_context), environment=environment, secrets=secrets if secrets else {}, include_sidecars=self.include_sidecars, @@ -733,10 +714,10 @@ def _run_task_kwargs( task_definition_config = DagsterEcsTaskDefinitionConfig.from_task_definition_dict( task_definition_dict, - self._get_container_name(container_context), + self.get_container_name(container_context), ) - container_name = self._get_container_name(container_context) + container_name = self.get_container_name(container_context) backoff( self._reuse_or_register_task_definition, @@ -897,7 +878,7 @@ def check_run_worker_health(self, run: DagsterRun): logs_client=self.logs, cluster=tags.cluster, task_arn=tags.arn, - container_name=self._get_container_name(container_context), + container_name=self.get_container_name(container_context), ) except: logging.exception(f"Error trying to get logs for failed task {tags.arn}") diff --git a/python_modules/libraries/dagster-aws/dagster_aws/ecs/utils.py b/python_modules/libraries/dagster-aws/dagster_aws/ecs/utils.py index ec3d9edade381..63627be44a450 100644 --- a/python_modules/libraries/dagster-aws/dagster_aws/ecs/utils.py +++ b/python_modules/libraries/dagster-aws/dagster_aws/ecs/utils.py @@ -19,6 +19,40 @@ def _get_family_hash(name): return f"{name[:55]}_{name_hash}" +class RetryableEcsException(Exception): ... + + +def run_ecs_task(ecs, run_task_kwargs) -> Mapping[str, Any]: + response = ecs.run_task(**run_task_kwargs) + + tasks = response["tasks"] + + if not tasks: + failures = response["failures"] + failure_messages = [] + for failure in failures: + arn = failure.get("arn") + reason = failure.get("reason") + detail = failure.get("detail") + + failure_message = ( + "Task" + + (f" {arn}" if arn else "") + + " failed." + + (f" Failure reason: {reason}" if reason else "") + + (f" Failure details: {detail}" if detail else "") + ) + failure_messages.append(failure_message) + + failure_message = "\n".join(failure_messages) if failure_messages else "Task failed." + + if "Capacity is unavailable at this time" in failure_message: + raise RetryableEcsException(failure_message) + + raise Exception(failure_message) + return tasks[0] + + def get_task_definition_family( prefix: str, job_origin: RemoteJobOrigin, diff --git a/python_modules/libraries/dagster-aws/dagster_aws/pipes/message_readers.py b/python_modules/libraries/dagster-aws/dagster_aws/pipes/message_readers.py index b81f57f4d6894..7f857d94bce10 100644 --- a/python_modules/libraries/dagster-aws/dagster_aws/pipes/message_readers.py +++ b/python_modules/libraries/dagster-aws/dagster_aws/pipes/message_readers.py @@ -37,6 +37,7 @@ extract_message_or_forward_to_stdout, forward_only_logs_to_file, ) +from dagster._utils.backoff import backoff from dagster_pipes import PipesBlobStoreMessageWriter, PipesDefaultMessageWriter if TYPE_CHECKING: @@ -216,11 +217,41 @@ def no_messages_debug_text(self) -> str: ) +# Number of retries to attempt getting cloudwatch logs when faced with a throttling exception. +DEFAULT_CLOUDWATCH_LOGS_MAX_RETRIES = 10 + + +# Custom backoff delay_generator for get_log_events which adds some jitter +def get_log_events_delay_generator() -> Iterator[float]: + i = 0.5 + while True: + yield i + i *= 2 + i += random.uniform(0, 1) + + +def get_log_events( + client: "CloudWatchLogsClient", + max_retries: Optional[int] = DEFAULT_CLOUDWATCH_LOGS_MAX_RETRIES, + **log_params, +): + max_retries = max_retries or DEFAULT_CLOUDWATCH_LOGS_MAX_RETRIES + + return backoff( + fn=client.get_log_events, + kwargs=log_params, + retry_on=(client.exceptions.ThrottlingException,), + max_retries=max_retries, + delay_generator=get_log_events_delay_generator(), + ) + + def tail_cloudwatch_events( client: "CloudWatchLogsClient", log_group: str, log_stream: str, start_time: Optional[int] = None, + max_retries: Optional[int] = DEFAULT_CLOUDWATCH_LOGS_MAX_RETRIES, ) -> Generator[List["OutputLogEventTypeDef"], None, None]: """Yields events from a CloudWatch log stream.""" params: Dict[str, Any] = { @@ -231,7 +262,7 @@ def tail_cloudwatch_events( if start_time is not None: params["startTime"] = start_time - response = client.get_log_events(**params) + response = get_log_events(client=client, max_retries=max_retries, **params) while True: events = response.get("events") @@ -241,7 +272,7 @@ def tail_cloudwatch_events( params["nextToken"] = response["nextForwardToken"] - response = client.get_log_events(**params) + response = get_log_events(client=client, max_retries=max_retries, **params) @experimental @@ -254,6 +285,7 @@ def __init__( target_stream: Optional[IO[str]] = None, start_time: Optional[int] = None, debug_info: Optional[str] = None, + max_retries: Optional[int] = DEFAULT_CLOUDWATCH_LOGS_MAX_RETRIES, ): self.client = client or boto3.client("logs") self.log_group = log_group @@ -262,6 +294,7 @@ def __init__( self.thread = None self.start_time = start_time self._debug_info = debug_info + self.max_retries = max_retries @property def debug_info(self) -> Optional[str]: @@ -274,11 +307,15 @@ def target_is_readable(self, params: PipesParams) -> bool: if log_group is not None and log_stream is not None: # check if the stream actually exists try: - self.client.describe_log_streams( + resp = self.client.describe_log_streams( logGroupName=log_group, logStreamNamePrefix=log_stream, ) - return True + + if resp.get("logStreams", []): + return True + else: + return False except self.client.exceptions.ResourceNotFoundException: return False else: @@ -301,7 +338,7 @@ def _start(self, params: PipesParams, is_session_closed: Event) -> None: start_time = cast(int, self.start_time or params.get("start_time")) for events in tail_cloudwatch_events( - self.client, log_group, log_stream, start_time=start_time + self.client, log_group, log_stream, start_time=start_time, max_retries=self.max_retries ): for event in events: for line in event.get("message", "").splitlines(): @@ -328,6 +365,7 @@ def __init__( log_group: Optional[str] = None, log_stream: Optional[str] = None, log_readers: Optional[Sequence[PipesLogReader]] = None, + max_retries: Optional[int] = DEFAULT_CLOUDWATCH_LOGS_MAX_RETRIES, ): """Args: client (boto3.client): boto3 CloudWatch client. @@ -335,6 +373,7 @@ def __init__( self.client: "CloudWatchLogsClient" = client or boto3.client("logs") self.log_group = log_group self.log_stream = log_stream + self.max_retries = max_retries self.start_time = datetime.now() @@ -357,11 +396,15 @@ def messages_are_readable(self, params: PipesParams) -> bool: if self.log_group is not None and self.log_stream is not None: # check if the stream actually exists try: - self.client.describe_log_streams( + resp = self.client.describe_log_streams( logGroupName=self.log_group, logStreamNamePrefix=self.log_stream, ) - return True + + if resp.get("logStreams", []): + return True + else: + return False except self.client.exceptions.ResourceNotFoundException: return False else: @@ -379,7 +422,7 @@ def download_messages( if cursor is not None: params["nextToken"] = cursor - response = self.client.get_log_events(**params) + response = get_log_events(client=self.client, max_retries=self.max_retries, **params) events = response.get("events") diff --git a/python_modules/libraries/dagster-aws/dagster_aws_tests/ecs_tests/launcher_tests/executor_tests/test_example_executor_mode_def.py b/python_modules/libraries/dagster-aws/dagster_aws_tests/ecs_tests/launcher_tests/executor_tests/test_example_executor_mode_def.py new file mode 100644 index 0000000000000..12aa4f15f3cb6 --- /dev/null +++ b/python_modules/libraries/dagster-aws/dagster_aws_tests/ecs_tests/launcher_tests/executor_tests/test_example_executor_mode_def.py @@ -0,0 +1,26 @@ +# ruff: isort: skip_file +# fmt: off +# start_marker +from dagster_aws.ecs import ecs_executor + +from dagster import job, op + + +@op( + tags={"ecs/cpu": "256", "ecs/memory": "512"}, +) +def ecs_op(): + pass + + +@job(executor_def=ecs_executor) +def ecs_job(): + ecs_op() + + +# end_marker +# fmt: on + + +def test_mode(): + assert ecs_job diff --git a/python_modules/libraries/dagster-aws/dagster_aws_tests/ecs_tests/launcher_tests/executor_tests/test_executor.py b/python_modules/libraries/dagster-aws/dagster_aws_tests/ecs_tests/launcher_tests/executor_tests/test_executor.py new file mode 100644 index 0000000000000..87e25aeadd598 --- /dev/null +++ b/python_modules/libraries/dagster-aws/dagster_aws_tests/ecs_tests/launcher_tests/executor_tests/test_executor.py @@ -0,0 +1,245 @@ +from typing import Callable, ContextManager + +from dagster import job, op, repository +from dagster._config import process_config, resolve_to_config_type +from dagster._core.definitions.reconstruct import reconstructable +from dagster._core.execution.api import create_execution_plan +from dagster._core.execution.context.system import PlanData, PlanOrchestrationContext +from dagster._core.execution.context_creation_job import create_context_free_log_manager +from dagster._core.execution.retries import RetryMode +from dagster._core.executor.init import InitExecutorContext +from dagster._core.executor.step_delegating.step_handler.base import StepHandlerContext +from dagster._core.instance import DagsterInstance +from dagster._core.remote_representation.handle import RepositoryHandle +from dagster._core.storage.fs_io_manager import fs_io_manager +from dagster._core.test_utils import create_run_for_test, in_process_test_workspace +from dagster._core.types.loadable_target_origin import LoadableTargetOrigin +from dagster._grpc.types import ExecuteStepArgs +from dagster._utils.hosted_user_process import remote_job_from_recon_job + +from dagster_aws.ecs.executor import _ECS_EXECUTOR_CONFIG_SCHEMA, ecs_executor + + +@job( + executor_def=ecs_executor, + resource_defs={"io_manager": fs_io_manager}, +) +def bar(): + @op( + tags={ + "ecs/cpu": "1024", + "ecs/memory": "512", + } + ) + def foo(): + return 1 + + foo() + + +@repository +def bar_repo(): + return [bar] + + +def _get_executor(instance, job_def, executor_config=None): + process_result = process_config( + resolve_to_config_type(_ECS_EXECUTOR_CONFIG_SCHEMA), executor_config or {} + ) + if not process_result.success: + raise AssertionError(f"Process result errors: {process_result.errors}") + + return ecs_executor.executor_creation_fn( # type: ignore + InitExecutorContext( + job=job_def, + executor_def=ecs_executor, + executor_config=process_result.value, # type: ignore + instance=instance, + ) + ) + + +def _step_handler_context(job_def, dagster_run, instance, executor): + execution_plan = create_execution_plan(job_def) + log_manager = create_context_free_log_manager(instance, dagster_run) + + plan_context = PlanOrchestrationContext( + plan_data=PlanData( + job=job_def, + dagster_run=dagster_run, + instance=instance, + execution_plan=execution_plan, + raise_on_error=True, + retry_mode=RetryMode.DISABLED, + ), + log_manager=log_manager, + executor=executor, + output_capture=None, + ) + + execute_step_args = ExecuteStepArgs( + reconstructable(bar).get_python_origin(), + dagster_run.run_id, + ["foo"], + print_serialized_events=False, + ) + + return StepHandlerContext( + instance=instance, + plan_context=plan_context, + steps=execution_plan.steps, # type: ignore + execute_step_args=execute_step_args, + ) + + +def test_executor_init(instance_cm: Callable[..., ContextManager[DagsterInstance]]): + with instance_cm() as instance: + recon_job = reconstructable(bar) + loadable_target_origin = LoadableTargetOrigin(python_file=__file__, attribute="bar_repo") + + memory = 128 + cpu = 500 + env_var = {"key": "OVERRIDE_VAR", "value": "foo"} + executor = _get_executor( + instance, + reconstructable(bar), + { + "cpu": cpu, + "memory": memory, + "task_overrides": { + "containerOverrides": [ + { + "name": "run", + "environment": [env_var], + } + ], + }, + }, + ) + + with in_process_test_workspace( + instance, loadable_target_origin, container_image="testing/dagster" + ) as workspace: + location = workspace.get_code_location(workspace.code_location_names[0]) + repo_handle = RepositoryHandle.from_location( + repository_name="bar_repo", + code_location=location, + ) + fake_remote_job = remote_job_from_recon_job( + recon_job, + op_selection=None, + repository_handle=repo_handle, + ) + + run = create_run_for_test( + instance, + job_name="bar", + remote_job_origin=fake_remote_job.get_remote_origin(), + job_code_origin=recon_job.get_python_origin(), + ) + step_handler_context = _step_handler_context( + job_def=reconstructable(bar), + dagster_run=run, + instance=instance, + executor=executor, + ) + run_task_kwargs = executor._step_handler._get_run_task_kwargs( # type: ignore # noqa: SLF001 + run, + ["my-command"], + "asdasd", + {}, + step_handler_context, + executor._step_handler._get_container_context(step_handler_context), # type: ignore # noqa: SLF001 + ) + + assert run_task_kwargs["launchType"] == "FARGATE" # this comes from the Run Launcher + + overrides = run_task_kwargs["overrides"] + + assert overrides["cpu"] == str(cpu) + assert overrides["memory"] == str(memory) + + run_container_overrides = overrides["containerOverrides"][0] + + assert run_container_overrides["name"] == "run" + assert run_container_overrides["command"] == ["my-command"] + + assert env_var in run_container_overrides["environment"] + + +def test_executor_launch(instance_cm: Callable[..., ContextManager[DagsterInstance]]): + with instance_cm() as instance: + recon_job = reconstructable(bar) + loadable_target_origin = LoadableTargetOrigin(python_file=__file__, attribute="bar_repo") + + with in_process_test_workspace( + instance, loadable_target_origin, container_image="testing/dagster" + ) as workspace: + location = workspace.get_code_location(workspace.code_location_names[0]) + repo_handle = RepositoryHandle.from_location( + repository_name="bar_repo", + code_location=location, + ) + fake_remote_job = remote_job_from_recon_job( + recon_job, + op_selection=None, + repository_handle=repo_handle, + ) + + executor = _get_executor(instance, reconstructable(bar), {}) + run = create_run_for_test( + instance, + job_name="bar", + remote_job_origin=fake_remote_job.get_remote_origin(), + job_code_origin=recon_job.get_python_origin(), + ) + step_handler_context = _step_handler_context( + job_def=reconstructable(bar), + dagster_run=run, + instance=instance, + executor=executor, + ) + from unittest.mock import MagicMock + + executor._step_handler.ecs.run_task = MagicMock( # type: ignore # noqa: SLF001 + return_value={"tasks": [{"taskArn": "arn:123"}]} + ) + + next(iter(executor._step_handler.launch_step(step_handler_context))) # type: ignore # noqa: SLF001 + + run_task_kwargs = executor._step_handler.ecs.run_task.call_args[1] # type: ignore # noqa: SLF001 + + # resources should come from step tags + assert run_task_kwargs["overrides"]["cpu"] == "1024" + assert run_task_kwargs["overrides"]["memory"] == "512" + + tags = run_task_kwargs["tags"] + + assert { + "key": "dagster/run-id", + "value": run.run_id, + } in tags + + assert { + "key": "dagster/job", + "value": run.job_name, + } in tags + + assert { + "key": "dagster/step-key", + "value": "foo", + } in tags + + assert run_task_kwargs["overrides"]["containerOverrides"][0]["command"] == [ + "dagster", + "api", + "execute_step", + ] + + found_executor_args_var = False + for var in run_task_kwargs["overrides"]["containerOverrides"][0]["environment"]: + if var["name"] == "DAGSTER_COMPRESSED_EXECUTE_STEP_ARGS": + found_executor_args_var = True + break + + assert found_executor_args_var diff --git a/python_modules/libraries/dagster-components/dagster_components/__init__.py b/python_modules/libraries/dagster-components/dagster_components/__init__.py index f5463bf0d60f8..13b5b3d5758ad 100644 --- a/python_modules/libraries/dagster-components/dagster_components/__init__.py +++ b/python_modules/libraries/dagster-components/dagster_components/__init__.py @@ -1,7 +1,6 @@ -from dagster._core.libraries import DagsterLibraryRegistry - from dagster_components.core.component import ( Component as Component, + ComponentGenerateRequest as ComponentGenerateRequest, ComponentLoadContext as ComponentLoadContext, ComponentRegistry as ComponentRegistry, component as component, @@ -10,5 +9,3 @@ build_defs_from_toplevel_components_folder as build_defs_from_toplevel_components_folder, ) from dagster_components.version import __version__ as __version__ - -DagsterLibraryRegistry.register("dagster-components", __version__) diff --git a/python_modules/libraries/dagster-components/dagster_components/cli/__init__.py b/python_modules/libraries/dagster-components/dagster_components/cli/__init__.py index 025e67200f49c..bc123b3ddd2e0 100644 --- a/python_modules/libraries/dagster-components/dagster_components/cli/__init__.py +++ b/python_modules/libraries/dagster-components/dagster_components/cli/__init__.py @@ -3,6 +3,8 @@ from dagster_components.cli.generate import generate_cli from dagster_components.cli.list import list_cli +from dagster_components.core.component import BUILTIN_PUBLISHED_COMPONENT_ENTRY_POINT +from dagster_components.utils import CLI_BUILTIN_COMPONENT_LIB_KEY def create_dagster_components_cli(): @@ -15,9 +17,18 @@ def create_dagster_components_cli(): commands=commands, context_settings={"max_content_width": 120, "help_option_names": ["-h", "--help"]}, ) + @click.option( + "--builtin-component-lib", + type=str, + default=BUILTIN_PUBLISHED_COMPONENT_ENTRY_POINT, + help="Specify the builitin component library to load.", + ) @click.version_option(__version__, "--version", "-v") - def group(): + @click.pass_context + def group(ctx: click.Context, builtin_component_lib: str): """CLI tools for working with Dagster.""" + ctx.ensure_object(dict) + ctx.obj[CLI_BUILTIN_COMPONENT_LIB_KEY] = builtin_component_lib return group diff --git a/python_modules/libraries/dagster-components/dagster_components/cli/generate.py b/python_modules/libraries/dagster-components/dagster_components/cli/generate.py index 438d8ccb3b578..8405b5ec04e88 100644 --- a/python_modules/libraries/dagster-components/dagster_components/cli/generate.py +++ b/python_modules/libraries/dagster-components/dagster_components/cli/generate.py @@ -11,6 +11,7 @@ is_inside_code_location_project, ) from dagster_components.generate import generate_component_instance +from dagster_components.utils import CLI_BUILTIN_COMPONENT_LIB_KEY @click.group(name="generate") @@ -23,12 +24,15 @@ def generate_cli() -> None: @click.argument("component_name", type=str) @click.option("--json-params", type=str, default=None) @click.argument("extra_args", nargs=-1, type=str) +@click.pass_context def generate_component_command( + ctx: click.Context, component_type: str, component_name: str, json_params: Optional[str], extra_args: Tuple[str, ...], ) -> None: + builtin_component_lib = ctx.obj.get(CLI_BUILTIN_COMPONENT_LIB_KEY, False) if not is_inside_code_location_project(Path.cwd()): click.echo( click.style( @@ -38,7 +42,8 @@ def generate_component_command( sys.exit(1) context = CodeLocationProjectContext.from_path( - Path.cwd(), ComponentRegistry.from_entry_point_discovery() + Path.cwd(), + ComponentRegistry.from_entry_point_discovery(builtin_component_lib=builtin_component_lib), ) if not context.has_component_type(component_type): click.echo( diff --git a/python_modules/libraries/dagster-components/dagster_components/cli/list.py b/python_modules/libraries/dagster-components/dagster_components/cli/list.py index 9368a9fa1206a..28957aaa33aad 100644 --- a/python_modules/libraries/dagster-components/dagster_components/cli/list.py +++ b/python_modules/libraries/dagster-components/dagster_components/cli/list.py @@ -5,11 +5,12 @@ import click -from dagster_components.core.component import ComponentRegistry +from dagster_components.core.component import ComponentMetadata, ComponentRegistry from dagster_components.core.deployment import ( CodeLocationProjectContext, is_inside_code_location_project, ) +from dagster_components.utils import CLI_BUILTIN_COMPONENT_LIB_KEY @click.group(name="generate") @@ -18,8 +19,10 @@ def list_cli(): @list_cli.command(name="component-types") -def list_component_types_command() -> None: +@click.pass_context +def list_component_types_command(ctx: click.Context) -> None: """List registered Dagster components.""" + builtin_component_lib = ctx.obj.get(CLI_BUILTIN_COMPONENT_LIB_KEY, False) if not is_inside_code_location_project(Path.cwd()): click.echo( click.style( @@ -29,12 +32,15 @@ def list_component_types_command() -> None: sys.exit(1) context = CodeLocationProjectContext.from_path( - Path.cwd(), ComponentRegistry.from_entry_point_discovery() + Path.cwd(), + ComponentRegistry.from_entry_point_discovery(builtin_component_lib=builtin_component_lib), ) output: Dict[str, Any] = {} - for component_type in context.list_component_types(): - # package, name = component_type.rsplit(".", 1) - output[component_type] = { - "name": component_type, - } + for key, component_type in context.list_component_types(): + package, name = key.rsplit(".", 1) + output[key] = ComponentMetadata( + name=name, + package=package, + **component_type.get_metadata(), + ) click.echo(json.dumps(output)) diff --git a/python_modules/libraries/dagster-components/dagster_components/core/component.py b/python_modules/libraries/dagster-components/dagster_components/core/component.py index fe15ed559e48e..dc8ca5e0dab20 100644 --- a/python_modules/libraries/dagster-components/dagster_components/core/component.py +++ b/python_modules/libraries/dagster-components/dagster_components/core/component.py @@ -1,39 +1,102 @@ import copy +import dataclasses import importlib import importlib.metadata +import inspect import sys +import textwrap from abc import ABC, abstractmethod +from dataclasses import dataclass +from pathlib import Path from types import ModuleType -from typing import TYPE_CHECKING, Any, ClassVar, Dict, Iterable, Mapping, Optional, Sequence, Type +from typing import ( + Any, + ClassVar, + Dict, + Iterable, + Mapping, + Optional, + Sequence, + Type, + TypedDict, + TypeVar, +) from dagster import _check as check +from dagster._core.definitions.definitions_class import Definitions from dagster._core.errors import DagsterError -from dagster._utils import snakecase +from dagster._record import record +from dagster._utils import pushd, snakecase +from pydantic import TypeAdapter from typing_extensions import Self -if TYPE_CHECKING: - from dagster._core.definitions.definitions_class import Definitions +from dagster_components.core.component_rendering import TemplatedValueResolver, preprocess_value +from dagster_components.utils import ensure_dagster_components_tests_import class ComponentDeclNode: ... +@record +class ComponentGenerateRequest: + component_type_name: str + component_instance_root_path: Path + + class Component(ABC): name: ClassVar[Optional[str]] = None - component_params_schema: ClassVar = None + params_schema: ClassVar = None generate_params_schema: ClassVar = None @classmethod - def generate_files(cls, params: Any) -> Optional[Mapping[str, Any]]: ... + def generate_files(cls, request: ComponentGenerateRequest, params: Any) -> None: ... @abstractmethod - def build_defs(self, context: "ComponentLoadContext") -> "Definitions": ... + def build_defs(self, context: "ComponentLoadContext") -> Definitions: ... @classmethod @abstractmethod - def from_decl_node( - cls, context: "ComponentLoadContext", decl_node: "ComponentDeclNode" - ) -> Self: ... + def load(cls, context: "ComponentLoadContext") -> Self: ... + + @classmethod + def get_metadata(cls) -> "ComponentInternalMetadata": + docstring = cls.__doc__ + clean_docstring = _clean_docstring(docstring) if docstring else None + + return { + "summary": clean_docstring.split("\n\n")[0] if clean_docstring else None, + "description": clean_docstring if clean_docstring else None, + "generate_params_schema": cls.generate_params_schema.schema() + if cls.generate_params_schema + else None, + "component_params_schema": cls.params_schema.schema() if cls.params_schema else None, + } + + @classmethod + def get_description(cls) -> Optional[str]: + return inspect.getdoc(cls) + + +def _clean_docstring(docstring: str) -> str: + lines = docstring.strip().splitlines() + first_line = lines[0] + if len(lines) == 1: + return first_line + else: + rest = textwrap.dedent("\n".join(lines[1:])) + return f"{first_line}\n{rest}" + + +class ComponentInternalMetadata(TypedDict): + summary: Optional[str] + description: Optional[str] + generate_params_schema: Optional[Any] # json schema + component_params_schema: Optional[Any] # json schema + + +class ComponentMetadata(ComponentInternalMetadata): + name: str + package: str def get_entry_points_from_python_environment(group: str) -> Sequence[importlib.metadata.EntryPoint]: @@ -44,13 +107,43 @@ def get_entry_points_from_python_environment(group: str) -> Sequence[importlib.m COMPONENTS_ENTRY_POINT_GROUP = "dagster.components" +BUILTIN_COMPONENTS_ENTRY_POINT_BASE = "dagster_components" +BUILTIN_PUBLISHED_COMPONENT_ENTRY_POINT = BUILTIN_COMPONENTS_ENTRY_POINT_BASE +BUILTIN_TEST_COMPONENT_ENTRY_POINT = ".".join([BUILTIN_COMPONENTS_ENTRY_POINT_BASE, "test"]) class ComponentRegistry: @classmethod - def from_entry_point_discovery(cls) -> "ComponentRegistry": + def from_entry_point_discovery( + cls, builtin_component_lib: str = BUILTIN_PUBLISHED_COMPONENT_ENTRY_POINT + ) -> "ComponentRegistry": + """Discover components registered in the Python environment via the `dagster_components` entry point group. + + `dagster-components` itself registers multiple component entry points. We call these + "builtin" component libraries. The `dagster_components` entry point resolves to published + components and is loaded by default. Other entry points resolve to various sets of test + components. This method will only ever load one builtin component library. + + Args: + builtin-component-lib (str): Specifies the builtin components library to load. Builtin + copmonents libraries are defined under entry points with names matching the pattern + `dagster_components*`. Only one builtin component library can be loaded at a time. + Defaults to `dagster_components`, the standard set of published components. + """ components: Dict[str, Type[Component]] = {} for entry_point in get_entry_points_from_python_environment(COMPONENTS_ENTRY_POINT_GROUP): + # Skip builtin entry points that are not the specified builtin component library. + if ( + entry_point.name.startswith(BUILTIN_COMPONENTS_ENTRY_POINT_BASE) + and not entry_point.name == builtin_component_lib + ): + continue + elif entry_point.name == BUILTIN_TEST_COMPONENT_ENTRY_POINT: + if builtin_component_lib: + ensure_dagster_components_tests_import() + else: + continue + root_module = entry_point.load() if not isinstance(root_module, ModuleType): raise DagsterError( @@ -88,33 +181,64 @@ def __repr__(self) -> str: return f"" -def get_registered_components_in_module(root_module: ModuleType) -> Iterable[Type[Component]]: - from dagster._core.definitions.load_assets_from_modules import ( - find_modules_in_package, - find_subclasses_in_module, - ) +def get_registered_components_in_module(module: ModuleType) -> Iterable[Type[Component]]: + from dagster._core.definitions.load_assets_from_modules import find_subclasses_in_module + + for component in find_subclasses_in_module(module, (Component,)): + if is_registered_component(component): + yield component - for module in find_modules_in_package(root_module): - for component in find_subclasses_in_module(module, (Component,)): - if is_registered_component(component): - yield component +T = TypeVar("T") + +@dataclass class ComponentLoadContext: - def __init__(self, *, resources: Mapping[str, object], registry: ComponentRegistry): - self.registry = registry - self.resources = resources + resources: Mapping[str, object] + registry: ComponentRegistry + decl_node: Optional[ComponentDeclNode] + templated_value_resolver: TemplatedValueResolver @staticmethod def for_test( *, resources: Optional[Mapping[str, object]] = None, registry: Optional[ComponentRegistry] = None, + decl_node: Optional[ComponentDeclNode] = None, ) -> "ComponentLoadContext": return ComponentLoadContext( - resources=resources or {}, registry=registry or ComponentRegistry.empty() + resources=resources or {}, + registry=registry or ComponentRegistry.empty(), + decl_node=decl_node, + templated_value_resolver=TemplatedValueResolver.default(), ) + @property + def path(self) -> Path: + from dagster_components.core.component_decl_builder import YamlComponentDecl + + if not isinstance(self.decl_node, YamlComponentDecl): + check.failed(f"Unsupported decl_node type {type(self.decl_node)}") + + return self.decl_node.path + + def for_decl_node(self, decl_node: ComponentDeclNode) -> "ComponentLoadContext": + return dataclasses.replace(self, decl_node=decl_node) + + def _raw_params(self) -> Optional[Mapping[str, Any]]: + from dagster_components.core.component_decl_builder import YamlComponentDecl + + if not isinstance(self.decl_node, YamlComponentDecl): + check.failed(f"Unsupported decl_node type {type(self.decl_node)}") + return self.decl_node.component_file_model.params + + def load_params(self, params_schema: Type[T]) -> T: + with pushd(str(self.path)): + preprocessed_params = preprocess_value( + self.templated_value_resolver, self._raw_params(), params_schema + ) + return TypeAdapter(params_schema).validate_python(preprocessed_params) + COMPONENT_REGISTRY_KEY_ATTR = "__dagster_component_registry_key" diff --git a/python_modules/libraries/dagster-components/dagster_components/core/component_defs_builder.py b/python_modules/libraries/dagster-components/dagster_components/core/component_defs_builder.py index 3d3e7ab4d7851..04a1ce93ed03a 100644 --- a/python_modules/libraries/dagster-components/dagster_components/core/component_defs_builder.py +++ b/python_modules/libraries/dagster-components/dagster_components/core/component_defs_builder.py @@ -9,9 +9,9 @@ from dagster_components.core.component import ( Component, - ComponentDeclNode, ComponentLoadContext, ComponentRegistry, + TemplatedValueResolver, get_component_name, is_registered_component, ) @@ -39,24 +39,22 @@ def load_module_from_path(module_name, path) -> ModuleType: return module -def build_components_from_decl_node( - context: ComponentLoadContext, decl_node: ComponentDeclNode -) -> Sequence[Component]: - if isinstance(decl_node, YamlComponentDecl): - component_type = component_type_from_yaml_decl(context, decl_node) - return [component_type.from_decl_node(context, decl_node)] - elif isinstance(decl_node, ComponentFolder): +def load_components_from_context(context: ComponentLoadContext) -> Sequence[Component]: + if isinstance(context.decl_node, YamlComponentDecl): + component_type = component_type_from_yaml_decl(context.registry, context.decl_node) + return [component_type.load(context)] + elif isinstance(context.decl_node, ComponentFolder): components = [] - for sub_decl in decl_node.sub_decls: - components.extend(build_components_from_decl_node(context, sub_decl)) + for sub_decl in context.decl_node.sub_decls: + components.extend(load_components_from_context(context.for_decl_node(sub_decl))) return components - raise NotImplementedError(f"Unknown component type {decl_node}") + raise NotImplementedError(f"Unknown component type {context.decl_node}") def component_type_from_yaml_decl( - context: ComponentLoadContext, decl_node: YamlComponentDecl -) -> Type: + registry: ComponentRegistry, decl_node: YamlComponentDecl +) -> Type[Component]: parsed_defs = decl_node.component_file_model if parsed_defs.type.startswith("."): component_registry_key = parsed_defs.type[1:] @@ -79,16 +77,15 @@ def component_type_from_yaml_decl( f"Could not find component type {component_registry_key} in {decl_node.path}" ) - return context.registry.get(parsed_defs.type) + return registry.get(parsed_defs.type) def build_components_from_component_folder( - context: ComponentLoadContext, - path: Path, + context: ComponentLoadContext, path: Path ) -> Sequence[Component]: component_folder = path_to_decl_node(path) assert isinstance(component_folder, ComponentFolder) - return build_components_from_decl_node(context, component_folder) + return load_components_from_context(context.for_decl_node(component_folder)) def build_defs_from_component_path( @@ -97,12 +94,17 @@ def build_defs_from_component_path( resources: Mapping[str, object], ) -> "Definitions": """Build a definitions object from a folder within the components hierarchy.""" - context = ComponentLoadContext(resources=resources, registry=registry) - decl_node = path_to_decl_node(path=path) if not decl_node: raise Exception(f"No component found at path {path}") - components = build_components_from_decl_node(context, decl_node) + + context = ComponentLoadContext( + resources=resources, + registry=registry, + decl_node=decl_node, + templated_value_resolver=TemplatedValueResolver.default(), + ) + components = load_components_from_context(context) return defs_from_components(resources=resources, context=context, components=components) diff --git a/python_modules/libraries/dagster-components/dagster_components/core/component_rendering.py b/python_modules/libraries/dagster-components/dagster_components/core/component_rendering.py new file mode 100644 index 0000000000000..02b47e71be6d2 --- /dev/null +++ b/python_modules/libraries/dagster-components/dagster_components/core/component_rendering.py @@ -0,0 +1,118 @@ +import json +import os +from typing import AbstractSet, Any, Mapping, Optional, Sequence, Type, TypeVar, Union + +import dagster._check as check +from dagster._record import record +from jinja2 import Template +from pydantic import BaseModel, Field +from pydantic.fields import FieldInfo + +T = TypeVar("T") + +REF_BASE = "#/$defs/" +REF_TEMPLATE = f"{REF_BASE}{{model}}" + +CONTEXT_KEY = "required_rendering_scope" + + +def RenderingScope(field: Optional[FieldInfo] = None, *, required_scope: AbstractSet[str]) -> Any: + """Defines a Pydantic Field that requires a specific scope to be available before rendering. + + Examples: + ```python + class Schema(BaseModel): + a: str = RenderingScope(required_scope={"foo", "bar"}) + b: Optional[int] = RenderingScope(Field(default=None), required_scope={"baz"}) + ``` + """ + return FieldInfo.merge_field_infos( + field or Field(), Field(json_schema_extra={CONTEXT_KEY: json.dumps(list(required_scope))}) + ) + + +def get_required_rendering_context(subschema: Mapping[str, Any]) -> Optional[AbstractSet[str]]: + raw = check.opt_inst(subschema.get(CONTEXT_KEY), str) + return set(json.loads(raw)) if raw else None + + +def _env(key: str) -> Optional[str]: + return os.environ.get(key) + + +@record +class TemplatedValueResolver: + context: Mapping[str, Any] + + @staticmethod + def default() -> "TemplatedValueResolver": + return TemplatedValueResolver(context={"env": _env}) + + def with_context(self, **additional_context) -> "TemplatedValueResolver": + return TemplatedValueResolver(context={**self.context, **additional_context}) + + def resolve(self, val: str) -> str: + return Template(val).render(**self.context) + + +def _should_render( + valpath: Sequence[Union[str, int]], json_schema: Mapping[str, Any], subschema: Mapping[str, Any] +) -> bool: + # List[ComplexType] (e.g.) will contain a reference to the complex type schema in the + # top-level $defs, so we dereference it here. + if "$ref" in subschema: + subschema = json_schema["$defs"].get(subschema["$ref"][len(REF_BASE) :]) + + if get_required_rendering_context(subschema) is not None: + return False + elif len(valpath) == 0: + return True + + # Optional[ComplexType] (e.g.) will contain multiple schemas in the "anyOf" field + if "anyOf" in subschema: + return any(_should_render(valpath, json_schema, inner) for inner in subschema["anyOf"]) + + el = valpath[0] + if isinstance(el, str): + # valpath: ['field'] + # field: X + inner = subschema.get("properties", {}).get(el) + elif isinstance(el, int): + # valpath: ['field', 0] + # field: List[X] + inner = subschema.get("items") + else: + check.failed(f"Unexpected valpath element: {el}") + + # the path wasn't valid + if not inner: + return False + + _, *rest = valpath + return _should_render(rest, json_schema, inner) + + +def _render_values( + value_resolver: TemplatedValueResolver, + val: Any, + valpath: Sequence[Union[str, int]], + json_schema: Optional[Mapping[str, Any]], +) -> Any: + if json_schema and not _should_render(valpath, json_schema, json_schema): + return val + elif isinstance(val, dict): + return { + k: _render_values(value_resolver, v, [*valpath, k], json_schema) for k, v in val.items() + } + elif isinstance(val, list): + return [ + _render_values(value_resolver, v, [*valpath, i], json_schema) for i, v in enumerate(val) + ] + else: + return value_resolver.resolve(val) + + +def preprocess_value(renderer: TemplatedValueResolver, val: T, target_type: Type) -> T: + """Given a raw value, preprocesses it by rendering any templated values that are not marked as deferred in the target_type's json schema.""" + json_schema = target_type.model_json_schema() if issubclass(target_type, BaseModel) else None + return _render_values(renderer, val, [], json_schema) diff --git a/python_modules/libraries/dagster-components/dagster_components/core/deployment.py b/python_modules/libraries/dagster-components/dagster_components/core/deployment.py index ca52531dc4e77..de95b5875a0ad 100644 --- a/python_modules/libraries/dagster-components/dagster_components/core/deployment.py +++ b/python_modules/libraries/dagster-components/dagster_components/core/deployment.py @@ -1,6 +1,6 @@ import os from pathlib import Path -from typing import Final, Iterable, Type +from typing import Final, Iterable, Tuple, Type import tomli from dagster._core.errors import DagsterError @@ -78,8 +78,9 @@ def get_component_type(self, name: str) -> Type[Component]: raise DagsterError(f"No component type named {name}") return self._component_registry.get(name) - def list_component_types(self) -> Iterable[str]: - return sorted(self._component_registry.keys()) + def list_component_types(self) -> Iterable[Tuple[str, Type[Component]]]: + for key in sorted(self._component_registry.keys()): + yield key, self._component_registry.get(key) def get_component_instance_path(self, name: str) -> str: if name not in self.component_instances: diff --git a/python_modules/libraries/dagster-components/dagster_components/core/dsl_schema.py b/python_modules/libraries/dagster-components/dagster_components/core/dsl_schema.py index bf63f7aba6e76..39af228db176f 100644 --- a/python_modules/libraries/dagster-components/dagster_components/core/dsl_schema.py +++ b/python_modules/libraries/dagster-components/dagster_components/core/dsl_schema.py @@ -1,8 +1,90 @@ -from typing import Dict, Optional +from abc import ABC, abstractmethod +from typing import Annotated, Any, Dict, Literal, Mapping, Optional, Sequence, Union -from pydantic import BaseModel +from dagster._core.definitions.asset_selection import AssetSelection +from dagster._core.definitions.asset_spec import AssetSpec, map_asset_specs +from dagster._core.definitions.assets import AssetsDefinition +from dagster._core.definitions.declarative_automation.automation_condition import ( + AutomationCondition, +) +from dagster._core.definitions.definitions_class import Definitions +from dagster._record import replace +from pydantic import BaseModel, Field class OpSpecBaseModel(BaseModel): name: Optional[str] = None tags: Optional[Dict[str, str]] = None + + +class AutomationConditionModel(BaseModel): + type: str + params: Mapping[str, Any] = {} + + def to_automation_condition(self) -> AutomationCondition: + return getattr(AutomationCondition, self.type)(**self.params) + + +class AssetSpecProcessor(ABC, BaseModel): + target: str = "*" + description: Optional[str] = None + metadata: Optional[Mapping[str, Any]] = None + group_name: Optional[str] = None + tags: Optional[Mapping[str, str]] = None + automation_condition: Optional[AutomationConditionModel] = None + + def _attributes(self) -> Mapping[str, Any]: + return { + **self.model_dump(exclude={"target", "operation"}, exclude_unset=True), + **{ + "automation_condition": self.automation_condition.to_automation_condition() + if self.automation_condition + else None + }, + } + + @abstractmethod + def _apply_to_spec(self, spec: AssetSpec) -> AssetSpec: ... + + def apply(self, defs: Definitions) -> Definitions: + target_selection = AssetSelection.from_string(self.target, include_sources=True) + target_keys = target_selection.resolve(defs.get_asset_graph()) + + mappable = [d for d in defs.assets or [] if isinstance(d, (AssetsDefinition, AssetSpec))] + mapped_assets = map_asset_specs( + lambda spec: self._apply_to_spec(spec) if spec.key in target_keys else spec, mappable + ) + + assets = [ + *mapped_assets, + *[d for d in defs.assets or [] if not isinstance(d, (AssetsDefinition, AssetSpec))], + ] + return replace(defs, assets=assets) + + +class MergeAttributes(AssetSpecProcessor): + # default operation is "merge" + operation: Literal["merge"] = "merge" + + def _apply_to_spec(self, spec: AssetSpec) -> AssetSpec: + attributes = self._attributes() + mergeable_attributes = {"metadata", "tags"} + merge_attributes = {k: v for k, v in attributes.items() if k in mergeable_attributes} + replace_attributes = {k: v for k, v in attributes.items() if k not in mergeable_attributes} + return spec.merge_attributes(**merge_attributes).replace_attributes(**replace_attributes) + + +class ReplaceAttributes(AssetSpecProcessor): + # operation must be set explicitly + operation: Literal["replace"] + + def _apply_to_spec(self, spec: AssetSpec) -> AssetSpec: + return spec.replace_attributes(**self._attributes()) + + +AssetAttributes = Sequence[ + Annotated[ + Union[MergeAttributes, ReplaceAttributes], + Field(union_mode="left_to_right"), + ] +] diff --git a/python_modules/libraries/dagster-components/dagster_components/generate.py b/python_modules/libraries/dagster-components/dagster_components/generate.py index dcda0cd58967b..5f6e9a369bf83 100644 --- a/python_modules/libraries/dagster-components/dagster_components/generate.py +++ b/python_modules/libraries/dagster-components/dagster_components/generate.py @@ -1,13 +1,12 @@ import os from pathlib import Path -from typing import Any, Type +from typing import Any, Mapping, Optional, Type import click import yaml -from dagster._generate.generate import generate_project -from dagster._utils import pushd +from dagster._utils import mkdir_p -from dagster_components.core.component import Component +from dagster_components.core.component import Component, ComponentGenerateRequest class ComponentDumper(yaml.Dumper): @@ -18,6 +17,16 @@ def write_line_break(self) -> None: super().write_line_break() +def generate_component_yaml( + request: ComponentGenerateRequest, component_params: Optional[Mapping[str, Any]] +) -> None: + with open(request.component_instance_root_path / "component.yaml", "w") as f: + component_data = {"type": request.component_type_name, "params": component_params or {}} + yaml.dump( + component_data, f, Dumper=ComponentDumper, sort_keys=False, default_flow_style=False + ) + + def generate_component_instance( root_path: str, name: str, @@ -25,22 +34,20 @@ def generate_component_instance( component_type_name: str, generate_params: Any, ) -> None: - click.echo(f"Creating a Dagster component instance at {root_path}/{name}.py.") - - component_instance_root_path = os.path.join(root_path, name) - generate_project( - path=component_instance_root_path, - name_placeholder="COMPONENT_INSTANCE_NAME_PLACEHOLDER", - templates_path=os.path.join( - os.path.dirname(__file__), "templates", "COMPONENT_INSTANCE_NAME_PLACEHOLDER" + component_instance_root_path = Path(os.path.join(root_path, name)) + click.echo(f"Creating a Dagster component instance folder at {component_instance_root_path}.") + mkdir_p(str(component_instance_root_path)) + component_type.generate_files( + ComponentGenerateRequest( + component_type_name=component_type_name, + component_instance_root_path=component_instance_root_path, ), - project_name=name, - component_type=component_type_name, + generate_params, ) - with pushd(component_instance_root_path): - component_params = component_type.generate_files(generate_params) - component_data = {"type": component_type_name, "params": component_params or {}} - with open(Path(component_instance_root_path) / "component.yaml", "w") as f: - yaml.dump( - component_data, f, Dumper=ComponentDumper, sort_keys=False, default_flow_style=False + + component_yaml_path = component_instance_root_path / "component.yaml" + + if not component_yaml_path.exists(): + raise Exception( + f"Currently all components require a component.yaml file. Please ensure your implementation of generate_files writes this file at {component_yaml_path}." ) diff --git a/python_modules/libraries/dagster-components/dagster_components/lib/__init__.py b/python_modules/libraries/dagster-components/dagster_components/lib/__init__.py index e69de29bb2d1d..bc6a6f9f3e6d9 100644 --- a/python_modules/libraries/dagster-components/dagster_components/lib/__init__.py +++ b/python_modules/libraries/dagster-components/dagster_components/lib/__init__.py @@ -0,0 +1,16 @@ +import importlib.util + +_has_dagster_dbt = importlib.util.find_spec("dagster_dbt") is not None +_has_dagster_embedded_elt = importlib.util.find_spec("dagster_embedded_elt") is not None + +if _has_dagster_dbt: + from dagster_components.lib.dbt_project import DbtProjectComponent as DbtProjectComponent + +if _has_dagster_embedded_elt: + from dagster_components.lib.sling_replication import ( + SlingReplicationComponent as SlingReplicationComponent, + ) + +from dagster_components.lib.pipes_subprocess_script_collection import ( + PipesSubprocessScriptCollection as PipesSubprocessScriptCollection, +) diff --git a/python_modules/libraries/dagster-components/dagster_components/lib/dbt_project.py b/python_modules/libraries/dagster-components/dagster_components/lib/dbt_project.py index 6f11a5a21273a..02f8ca82273c4 100644 --- a/python_modules/libraries/dagster-components/dagster_components/lib/dbt_project.py +++ b/python_modules/libraries/dagster-components/dagster_components/lib/dbt_project.py @@ -1,23 +1,27 @@ import os from pathlib import Path -from typing import Any, Iterator, Mapping, Optional +from typing import Any, Iterator, Mapping, Optional, Sequence import click import dagster._check as check from dagster._core.definitions.asset_key import AssetKey from dagster._core.definitions.definitions_class import Definitions +from dagster._core.execution.context.asset_execution_context import AssetExecutionContext from dagster._utils import pushd from dagster_dbt import DagsterDbtTranslator, DbtCliResource, DbtProject, dbt_assets -from dagster_embedded_elt.sling.resources import AssetExecutionContext from dbt.cli.main import dbtRunner -from jinja2 import Template -from pydantic import BaseModel, Field, TypeAdapter +from pydantic import BaseModel, Field from typing_extensions import Self from dagster_components import Component, ComponentLoadContext -from dagster_components.core.component import component -from dagster_components.core.component_decl_builder import ComponentDeclNode, YamlComponentDecl -from dagster_components.core.dsl_schema import OpSpecBaseModel +from dagster_components.core.component import ( + ComponentGenerateRequest, + TemplatedValueResolver, + component, +) +from dagster_components.core.component_rendering import RenderingScope +from dagster_components.core.dsl_schema import AssetAttributes, AssetSpecProcessor, OpSpecBaseModel +from dagster_components.generate import generate_component_yaml class DbtNodeTranslatorParams(BaseModel): @@ -28,7 +32,10 @@ class DbtNodeTranslatorParams(BaseModel): class DbtProjectParams(BaseModel): dbt: DbtCliResource op: Optional[OpSpecBaseModel] = None - translator: Optional[DbtNodeTranslatorParams] = None + translator: Optional[DbtNodeTranslatorParams] = RenderingScope( + Field(default=None), required_scope={"node"} + ) + asset_attributes: Optional[AssetAttributes] = None class DbtGenerateParams(BaseModel): @@ -47,8 +54,10 @@ class DbtProjectComponentTranslator(DagsterDbtTranslator): def __init__( self, *, + value_resolver: TemplatedValueResolver, translator_params: Optional[DbtNodeTranslatorParams] = None, ): + self.value_resolver = value_resolver self.translator_params = translator_params def get_asset_key(self, dbt_resource_props: Mapping[str, Any]) -> AssetKey: @@ -56,14 +65,18 @@ def get_asset_key(self, dbt_resource_props: Mapping[str, Any]) -> AssetKey: return super().get_asset_key(dbt_resource_props) return AssetKey.from_user_string( - Template(self.translator_params.key).render(node=dbt_resource_props) + self.value_resolver.with_context(node=dbt_resource_props).resolve( + self.translator_params.key + ) ) def get_group_name(self, dbt_resource_props) -> Optional[str]: if not self.translator_params or not self.translator_params.group: return super().get_group_name(dbt_resource_props) - return Template(self.translator_params.group).render(node=dbt_resource_props) + return self.value_resolver.with_context(node=dbt_resource_props).resolve( + self.translator_params.group + ) @component(name="dbt_project") @@ -76,26 +89,27 @@ def __init__( dbt_resource: DbtCliResource, op_spec: Optional[OpSpecBaseModel], dbt_translator: Optional[DagsterDbtTranslator], + asset_processors: Sequence[AssetSpecProcessor], ): self.dbt_resource = dbt_resource self.op_spec = op_spec self.dbt_translator = dbt_translator + self.asset_processors = asset_processors @classmethod - def from_decl_node(cls, context: ComponentLoadContext, decl_node: ComponentDeclNode) -> Self: - assert isinstance(decl_node, YamlComponentDecl) - + def load(cls, context: ComponentLoadContext) -> Self: # all paths should be resolved relative to the directory we're in - with pushd(str(decl_node.path)): - loaded_params = TypeAdapter(cls.params_schema).validate_python( - decl_node.component_file_model.params - ) + with pushd(str(context.path)): + loaded_params = context.load_params(cls.params_schema) + return cls( dbt_resource=loaded_params.dbt, op_spec=loaded_params.op, dbt_translator=DbtProjectComponentTranslator( - translator_params=loaded_params.translator + translator_params=loaded_params.translator, + value_resolver=context.templated_value_resolver, ), + asset_processors=loaded_params.asset_attributes or [], ) def build_defs(self, context: ComponentLoadContext) -> Definitions: @@ -112,10 +126,13 @@ def build_defs(self, context: ComponentLoadContext) -> Definitions: def _fn(context: AssetExecutionContext): yield from self.execute(context=context, dbt=self.dbt_resource) - return Definitions(assets=[_fn]) + defs = Definitions(assets=[_fn]) + for transform in self.asset_processors: + defs = transform.apply(defs) + return defs @classmethod - def generate_files(cls, params: DbtGenerateParams) -> Mapping[str, Any]: + def generate_files(cls, request: ComponentGenerateRequest, params: DbtGenerateParams) -> None: cwd = os.getcwd() if params.project_path: # NOTE: CWD is not set "correctly" above so we prepend "../../.." as a temporary hack to @@ -134,7 +151,7 @@ def generate_files(cls, params: DbtGenerateParams) -> Mapping[str, Any]: else: relative_path = None - return {"dbt": {"project_dir": relative_path}} + generate_component_yaml(request, {"dbt": {"project_dir": relative_path}}) def execute(self, context: AssetExecutionContext, dbt: DbtCliResource) -> Iterator: yield from dbt.cli(["build"], context=context).stream() diff --git a/python_modules/libraries/dagster-components/dagster_components/lib/pipes_subprocess_script_collection.py b/python_modules/libraries/dagster-components/dagster_components/lib/pipes_subprocess_script_collection.py index cc461b6b2b234..75a8a83730505 100644 --- a/python_modules/libraries/dagster-components/dagster_components/lib/pipes_subprocess_script_collection.py +++ b/python_modules/libraries/dagster-components/dagster_components/lib/pipes_subprocess_script_collection.py @@ -9,15 +9,10 @@ from dagster._core.execution.context.asset_execution_context import AssetExecutionContext from dagster._core.pipes.subprocess import PipesSubprocessClient from dagster._utils.warnings import suppress_dagster_warnings -from pydantic import BaseModel, TypeAdapter +from pydantic import BaseModel -from dagster_components.core.component import ( - Component, - ComponentDeclNode, - ComponentLoadContext, - component, -) -from dagster_components.core.component_decl_builder import YamlComponentDecl +from dagster_components.core.component import Component, ComponentLoadContext, component +from dagster_components.core.dsl_schema import AutomationConditionModel if TYPE_CHECKING: from dagster._core.definitions.definitions_class import Definitions @@ -33,6 +28,7 @@ class AssetSpecModel(BaseModel): code_version: Optional[str] = None owners: Sequence[str] = [] tags: Mapping[str, str] = {} + automation_condition: Optional[AutomationConditionModel] = None @suppress_dagster_warnings def to_asset_spec(self) -> AssetSpec: @@ -40,6 +36,9 @@ def to_asset_spec(self) -> AssetSpec: **{ **self.__dict__, "key": AssetKey.from_user_string(self.key), + "automation_condition": self.automation_condition.to_automation_condition() + if self.automation_condition + else None, }, ) @@ -55,6 +54,8 @@ class PipesSubprocessScriptCollectionParams(BaseModel): @component(name="pipes_subprocess_script_collection") class PipesSubprocessScriptCollection(Component): + """Assets that wrap Python scripts executed with Dagster's PipesSubprocessClient.""" + params_schema = PipesSubprocessScriptCollectionParams def __init__(self, dirpath: Path, path_specs: Mapping[Path, Sequence[AssetSpec]]): @@ -69,22 +70,17 @@ def introspect_from_path(path: Path) -> "PipesSubprocessScriptCollection": return PipesSubprocessScriptCollection(dirpath=path, path_specs=path_specs) @classmethod - def from_decl_node( - cls, load_context: ComponentLoadContext, component_decl: ComponentDeclNode - ) -> "PipesSubprocessScriptCollection": - assert isinstance(component_decl, YamlComponentDecl) - loaded_params = TypeAdapter(cls.params_schema).validate_python( - component_decl.component_file_model.params - ) + def load(cls, context: ComponentLoadContext) -> "PipesSubprocessScriptCollection": + loaded_params = context.load_params(cls.params_schema) path_specs = {} for script in loaded_params.scripts: - script_path = component_decl.path / script.path + script_path = context.path / script.path if not script_path.exists(): raise FileNotFoundError(f"Script {script_path} does not exist") path_specs[script_path] = [spec.to_asset_spec() for spec in script.assets] - return cls(dirpath=component_decl.path, path_specs=path_specs) + return cls(dirpath=context.path, path_specs=path_specs) def build_defs(self, load_context: "ComponentLoadContext") -> "Definitions": from dagster._core.definitions.definitions_class import Definitions diff --git a/python_modules/libraries/dagster-components/dagster_components/lib/sling_replication.py b/python_modules/libraries/dagster-components/dagster_components/lib/sling_replication.py index 772cc42d0e1ec..63c466930deb9 100644 --- a/python_modules/libraries/dagster-components/dagster_components/lib/sling_replication.py +++ b/python_modules/libraries/dagster-components/dagster_components/lib/sling_replication.py @@ -1,6 +1,6 @@ import os from pathlib import Path -from typing import Any, Iterator, Optional, Union +from typing import Any, Iterator, Optional, Sequence, Union import yaml from dagster._core.definitions.definitions_class import Definitions @@ -8,39 +8,45 @@ from dagster._core.definitions.result import MaterializeResult from dagster_embedded_elt.sling import SlingResource, sling_assets from dagster_embedded_elt.sling.resources import AssetExecutionContext -from pydantic import BaseModel, TypeAdapter +from pydantic import BaseModel from typing_extensions import Self from dagster_components import Component, ComponentLoadContext -from dagster_components.core.component import component -from dagster_components.core.component_decl_builder import ComponentDeclNode, YamlComponentDecl -from dagster_components.core.dsl_schema import OpSpecBaseModel +from dagster_components.core.component import ComponentGenerateRequest, component +from dagster_components.core.dsl_schema import AssetAttributes, AssetSpecProcessor, OpSpecBaseModel +from dagster_components.generate import generate_component_yaml class SlingReplicationParams(BaseModel): sling: Optional[SlingResource] = None op: Optional[OpSpecBaseModel] = None + asset_attributes: Optional[AssetAttributes] = None @component(name="sling_replication") class SlingReplicationComponent(Component): params_schema = SlingReplicationParams - def __init__(self, dirpath: Path, resource: SlingResource, op_spec: Optional[OpSpecBaseModel]): + def __init__( + self, + dirpath: Path, + resource: SlingResource, + op_spec: Optional[OpSpecBaseModel], + asset_processors: Sequence[AssetSpecProcessor], + ): self.dirpath = dirpath self.resource = resource self.op_spec = op_spec + self.asset_processors = asset_processors @classmethod - def from_decl_node(cls, context: ComponentLoadContext, decl_node: ComponentDeclNode) -> Self: - assert isinstance(decl_node, YamlComponentDecl) - loaded_params = TypeAdapter(cls.params_schema).validate_python( - decl_node.component_file_model.params - ) + def load(cls, context: ComponentLoadContext) -> Self: + loaded_params = context.load_params(cls.params_schema) return cls( - dirpath=decl_node.path, + dirpath=context.path, resource=loaded_params.sling or SlingResource(), op_spec=loaded_params.op, + asset_processors=loaded_params.asset_attributes or [], ) def build_defs(self, context: ComponentLoadContext) -> Definitions: @@ -52,10 +58,14 @@ def build_defs(self, context: ComponentLoadContext) -> Definitions: def _fn(context: AssetExecutionContext, sling: SlingResource): yield from self.execute(context=context, sling=sling) - return Definitions(assets=[_fn], resources={"sling": self.resource}) + defs = Definitions(assets=[_fn], resources={"sling": self.resource}) + for transform in self.asset_processors: + defs = transform.apply(defs) + return defs @classmethod - def generate_files(cls, params: Any) -> None: + def generate_files(cls, request: ComponentGenerateRequest, params: Any) -> None: + generate_component_yaml(request, params) replication_path = Path(os.getcwd()) / "replication.yaml" with open(replication_path, "w") as f: yaml.dump( diff --git a/python_modules/libraries/dagster-components/dagster_components/utils.py b/python_modules/libraries/dagster-components/dagster_components/utils.py new file mode 100644 index 0000000000000..5d4f19c11f694 --- /dev/null +++ b/python_modules/libraries/dagster-components/dagster_components/utils.py @@ -0,0 +1,14 @@ +import sys +from pathlib import Path + +CLI_BUILTIN_COMPONENT_LIB_KEY = "builtin_component_lib" + + +def ensure_dagster_components_tests_import() -> None: + from dagster_components import __file__ as dagster_components_init_py + + dagster_components_package_root = (Path(dagster_components_init_py) / ".." / "..").resolve() + assert ( + dagster_components_package_root / "dagster_components_tests" + ).exists(), "Could not find dagster_components_tests where expected" + sys.path.append(dagster_components_package_root.as_posix()) diff --git a/python_modules/libraries/dagster-components/dagster_components_tests/cli_tests/test_commands.py b/python_modules/libraries/dagster-components/dagster_components_tests/cli_tests/test_commands.py new file mode 100644 index 0000000000000..1f4fad6c8c99c --- /dev/null +++ b/python_modules/libraries/dagster-components/dagster_components_tests/cli_tests/test_commands.py @@ -0,0 +1,103 @@ +import json +from pathlib import Path + +from click.testing import CliRunner +from dagster_components.cli import cli +from dagster_components.utils import ensure_dagster_components_tests_import + +ensure_dagster_components_tests_import() + +from dagster_components_tests.utils import temp_code_location_bar + + +# Test that the global --use-test-component-lib flag changes the registered components +def test_global_test_flag(): + runner: CliRunner = CliRunner() + + # standard + result = runner.invoke(cli, ["list", "component-types"]) + assert result.exit_code == 0 + default_result_keys = list(json.loads(result.output).keys()) + assert len(default_result_keys) > 0 + + result = runner.invoke( + cli, ["--builtin-component-lib", "dagster_components.test", "list", "component-types"] + ) + assert result.exit_code == 0 + test_result_keys = list(json.loads(result.output).keys()) + assert len(default_result_keys) > 0 + + assert default_result_keys != test_result_keys + + +def test_list_component_types_command(): + runner = CliRunner() + + result = runner.invoke( + cli, ["--builtin-component-lib", "dagster_components.test", "list", "component-types"] + ) + assert result.exit_code == 0 + result = json.loads(result.output) + + assert list(result.keys()) == [ + "dagster_components.test.all_metadata_empty_asset", + "dagster_components.test.simple_asset", + "dagster_components.test.simple_pipes_script_asset", + ] + + assert result["dagster_components.test.simple_asset"] == { + "name": "simple_asset", + "package": "dagster_components.test", + "summary": "A simple asset that returns a constant string value.", + "description": "A simple asset that returns a constant string value.", + "generate_params_schema": None, + "component_params_schema": { + "properties": { + "asset_key": {"title": "Asset Key", "type": "string"}, + "value": {"title": "Value", "type": "string"}, + }, + "required": ["asset_key", "value"], + "title": "SimpleAssetParams", + "type": "object", + }, + } + + pipes_script_params_schema = { + "properties": { + "asset_key": {"title": "Asset Key", "type": "string"}, + "filename": {"title": "Filename", "type": "string"}, + }, + "required": ["asset_key", "filename"], + "title": "SimplePipesScriptAssetParams", + "type": "object", + } + + assert result["dagster_components.test.simple_pipes_script_asset"] == { + "name": "simple_pipes_script_asset", + "package": "dagster_components.test", + "summary": "A simple asset that runs a Python script with the Pipes subprocess client.", + "description": "A simple asset that runs a Python script with the Pipes subprocess client.\n\nBecause it is a pipes asset, no value is returned.", + "generate_params_schema": pipes_script_params_schema, + "component_params_schema": pipes_script_params_schema, + } + + +def test_generate_component_command(): + runner = CliRunner() + + with temp_code_location_bar(): + result = runner.invoke( + cli, + [ + "--builtin-component-lib", + "dagster_components.test", + "generate", + "component", + "dagster_components.test.simple_pipes_script_asset", + "qux", + "--json-params", + '{"asset_key": "my_asset", "filename": "my_asset.py"}', + ], + ) + assert result.exit_code == 0 + assert Path("bar/components/qux/my_asset.py").exists() diff --git a/python_modules/libraries/dagster-components/dagster_components_tests/code_locations/python_script_location/components/scripts/component.yaml b/python_modules/libraries/dagster-components/dagster_components_tests/code_locations/python_script_location/components/scripts/component.yaml index 34bd590951b8b..e65c0273bb93b 100644 --- a/python_modules/libraries/dagster-components/dagster_components_tests/code_locations/python_script_location/components/scripts/component.yaml +++ b/python_modules/libraries/dagster-components/dagster_components_tests/code_locations/python_script_location/components/scripts/component.yaml @@ -5,7 +5,13 @@ params: - path: script_one.py assets: - key: a + automation_condition: + type: eager - key: b + automation_condition: + type: on_cron + params: + cron_schedule: "@daily" deps: [up1, up2] - path: script_two.py assets: diff --git a/python_modules/libraries/dagster-components/dagster_components_tests/integration_tests/test_dbt_project.py b/python_modules/libraries/dagster-components/dagster_components_tests/integration_tests/test_dbt_project.py index d3bc69c881f32..781fa0ee16d6c 100644 --- a/python_modules/libraries/dagster-components/dagster_components_tests/integration_tests/test_dbt_project.py +++ b/python_modules/libraries/dagster-components/dagster_components_tests/integration_tests/test_dbt_project.py @@ -44,19 +44,17 @@ def dbt_path() -> Generator[Path, None, None]: def test_python_params(dbt_path: Path) -> None: - component = DbtProjectComponent.from_decl_node( - context=script_load_context(), - decl_node=YamlComponentDecl( - path=dbt_path / COMPONENT_RELPATH, - component_file_model=ComponentFileModel( - type="dbt_project", - params={ - "dbt": {"project_dir": "jaffle_shop"}, - "op": {"name": "some_op", "tags": {"tag1": "value"}}, - }, - ), + decl_node = YamlComponentDecl( + path=dbt_path / COMPONENT_RELPATH, + component_file_model=ComponentFileModel( + type="dbt_project", + params={ + "dbt": {"project_dir": "jaffle_shop"}, + "op": {"name": "some_op", "tags": {"tag1": "value"}}, + }, ), ) + component = DbtProjectComponent.load(context=script_load_context(decl_node)) assert get_asset_keys(component) == JAFFLE_SHOP_KEYS defs = component.build_defs(script_load_context()) assert defs.get_assets_def("stg_customers").op.name == "some_op" @@ -79,3 +77,8 @@ def test_load_from_path(dbt_path: Path) -> None: ) assert defs.get_asset_graph().get_all_asset_keys() == JAFFLE_SHOP_KEYS + + for asset_node in defs.get_asset_graph().asset_nodes: + assert asset_node.tags["foo"] == "bar" + assert asset_node.tags["another"] == "one" + assert asset_node.metadata["something"] == 1 diff --git a/python_modules/libraries/dagster-components/dagster_components_tests/integration_tests/test_sling_integration_test.py b/python_modules/libraries/dagster-components/dagster_components_tests/integration_tests/test_sling_integration_test.py index 494e4255aa853..d6fcf590b64ea 100644 --- a/python_modules/libraries/dagster-components/dagster_components_tests/integration_tests/test_sling_integration_test.py +++ b/python_modules/libraries/dagster-components/dagster_components_tests/integration_tests/test_sling_integration_test.py @@ -66,17 +66,15 @@ def _update_defs(data: Dict[str, Any]) -> Mapping[str, Any]: def test_python_params(sling_path: Path) -> None: - context = script_load_context() - component = SlingReplicationComponent.from_decl_node( - context=context, - decl_node=YamlComponentDecl( - path=sling_path / COMPONENT_RELPATH, - component_file_model=ComponentFileModel( - type="sling_replication", - params={"sling": {}}, - ), + decl_node = YamlComponentDecl( + path=sling_path / COMPONENT_RELPATH, + component_file_model=ComponentFileModel( + type="sling_replication", + params={"sling": {}}, ), ) + context = script_load_context(decl_node) + component = SlingReplicationComponent.load(context) assert component.op_spec is None assert get_asset_keys(component) == { AssetKey("input_csv"), @@ -89,17 +87,15 @@ def test_python_params(sling_path: Path) -> None: def test_python_params_op_name(sling_path: Path) -> None: - context = script_load_context() - component = SlingReplicationComponent.from_decl_node( - context=context, - decl_node=YamlComponentDecl( - path=sling_path / COMPONENT_RELPATH, - component_file_model=ComponentFileModel( - type="sling_replication", - params={"sling": {}, "op": {"name": "my_op"}}, - ), + decl_node = YamlComponentDecl( + path=sling_path / COMPONENT_RELPATH, + component_file_model=ComponentFileModel( + type="sling_replication", + params={"sling": {}, "op": {"name": "my_op"}}, ), ) + context = script_load_context(decl_node) + component = SlingReplicationComponent.load(context=context) assert component.op_spec assert component.op_spec.name == "my_op" defs = component.build_defs(context) @@ -112,17 +108,15 @@ def test_python_params_op_name(sling_path: Path) -> None: def test_python_params_op_tags(sling_path: Path) -> None: - context = script_load_context() - component = SlingReplicationComponent.from_decl_node( - context=context, - decl_node=YamlComponentDecl( - path=sling_path / COMPONENT_RELPATH, - component_file_model=ComponentFileModel( - type="sling_replication", - params={"sling": {}, "op": {"tags": {"tag1": "value1"}}}, - ), + decl_node = YamlComponentDecl( + path=sling_path / COMPONENT_RELPATH, + component_file_model=ComponentFileModel( + type="sling_replication", + params={"sling": {}, "op": {"tags": {"tag1": "value1"}}}, ), ) + context = script_load_context(decl_node) + component = SlingReplicationComponent.load(context=context) assert component.op_spec assert component.op_spec.tags == {"tag1": "value1"} defs = component.build_defs(context) @@ -150,16 +144,16 @@ def execute( ) -> Iterator[Union[AssetMaterialization, MaterializeResult]]: return sling.replicate(context=context, debug=True) - component_inst = DebugSlingReplicationComponent.from_decl_node( - context=script_load_context(), - decl_node=YamlComponentDecl( - path=STUB_LOCATION_PATH / COMPONENT_RELPATH, - component_file_model=ComponentFileModel( - type="debug_sling_replication", - params={"sling": {}}, - ), + decl_node = YamlComponentDecl( + path=STUB_LOCATION_PATH / COMPONENT_RELPATH, + component_file_model=ComponentFileModel( + type="debug_sling_replication", + params={"sling": {}}, ), ) + component_inst = DebugSlingReplicationComponent.load( + context=script_load_context(decl_node), + ) assert get_asset_keys(component_inst) == { AssetKey("input_csv"), AssetKey("input_duckdb"), diff --git a/python_modules/libraries/dagster-components/dagster_components_tests/integration_tests/test_templated_custom_keys_dbt_project.py b/python_modules/libraries/dagster-components/dagster_components_tests/integration_tests/test_templated_custom_keys_dbt_project.py index 5b21bed30e642..0c7d7554f8c88 100644 --- a/python_modules/libraries/dagster-components/dagster_components_tests/integration_tests/test_templated_custom_keys_dbt_project.py +++ b/python_modules/libraries/dagster-components/dagster_components_tests/integration_tests/test_templated_custom_keys_dbt_project.py @@ -6,6 +6,7 @@ import pytest from dagster import AssetKey +from dagster._utils.env import environ from dagster_components.core.component_decl_builder import ComponentFileModel from dagster_components.core.component_defs_builder import ( YamlComponentDecl, @@ -62,42 +63,40 @@ def dbt_path() -> Generator[Path, None, None]: def test_python_params_node_rename(dbt_path: Path) -> None: - component = DbtProjectComponent.from_decl_node( - context=script_load_context(), - decl_node=YamlComponentDecl( - path=dbt_path / COMPONENT_RELPATH, - component_file_model=ComponentFileModel( - type="dbt_project", - params={ - "dbt": {"project_dir": "jaffle_shop"}, - "translator": { - "key": "some_prefix/{{ node.name }}", - }, + decl_node = YamlComponentDecl( + path=dbt_path / COMPONENT_RELPATH, + component_file_model=ComponentFileModel( + type="dbt_project", + params={ + "dbt": {"project_dir": "jaffle_shop"}, + "translator": { + "key": "some_prefix/{{ node.name }}", }, - ), + }, ), ) + component = DbtProjectComponent.load( + context=script_load_context(decl_node), + ) assert get_asset_keys(component) == JAFFLE_SHOP_KEYS_WITH_PREFIX def test_python_params_group(dbt_path: Path) -> None: - comp = DbtProjectComponent.from_decl_node( - context=script_load_context(), - decl_node=YamlComponentDecl( - path=dbt_path / COMPONENT_RELPATH, - component_file_model=ComponentFileModel( - type="dbt_project", - params={ - "dbt": {"project_dir": "jaffle_shop"}, - "translator": { - "group": "some_group", - }, + decl_node = YamlComponentDecl( + path=dbt_path / COMPONENT_RELPATH, + component_file_model=ComponentFileModel( + type="dbt_project", + params={ + "dbt": {"project_dir": "jaffle_shop"}, + "translator": { + "group": "some_group", }, - ), + }, ), ) + comp = DbtProjectComponent.load(context=script_load_context(decl_node)) assert get_asset_keys(comp) == JAFFLE_SHOP_KEYS - defs: Definitions = comp.build_defs(script_load_context()) + defs: Definitions = comp.build_defs(script_load_context(None)) for key in get_asset_keys(comp): assert defs.get_assets_def(key).get_asset_spec(key).group_name == "some_group" @@ -118,3 +117,42 @@ def test_load_from_path(dbt_path: Path) -> None: ) assert defs.get_asset_graph().get_all_asset_keys() == JAFFLE_SHOP_KEYS_WITH_PREFIX + + +def test_render_vars_root(dbt_path: Path) -> None: + with environ({"GROUP_AS_ENV": "group_in_env"}): + decl_node = YamlComponentDecl( + path=dbt_path / COMPONENT_RELPATH, + component_file_model=ComponentFileModel( + type="dbt_project", + params={ + "dbt": {"project_dir": "jaffle_shop"}, + "translator": { + "group": "{{ env('GROUP_AS_ENV') }}", + }, + }, + ), + ) + comp = DbtProjectComponent.load(context=script_load_context(decl_node)) + assert get_asset_keys(comp) == JAFFLE_SHOP_KEYS + defs: Definitions = comp.build_defs(script_load_context()) + for key in get_asset_keys(comp): + assert defs.get_assets_def(key).get_asset_spec(key).group_name == "group_in_env" + + +def test_render_vars_asset_key(dbt_path: Path) -> None: + with environ({"ASSET_KEY_PREFIX": "some_prefix"}): + decl_node = YamlComponentDecl( + path=dbt_path / COMPONENT_RELPATH, + component_file_model=ComponentFileModel( + type="dbt_project", + params={ + "dbt": {"project_dir": "jaffle_shop"}, + "translator": { + "key": "{{ env('ASSET_KEY_PREFIX') }}/{{ node.name }}", + }, + }, + ), + ) + comp = DbtProjectComponent.load(context=script_load_context(decl_node)) + assert get_asset_keys(comp) == JAFFLE_SHOP_KEYS_WITH_PREFIX diff --git a/python_modules/libraries/dagster-components/dagster_components_tests/lib/__init__.py b/python_modules/libraries/dagster-components/dagster_components_tests/lib/__init__.py new file mode 100644 index 0000000000000..f988bb8df7cae --- /dev/null +++ b/python_modules/libraries/dagster-components/dagster_components_tests/lib/__init__.py @@ -0,0 +1,7 @@ +from dagster_components_tests.lib.all_metadata_empty_asset import ( + AllMetadataEmptyAsset as AllMetadataEmptyAsset, +) +from dagster_components_tests.lib.simple_asset import SimpleAsset as SimpleAsset +from dagster_components_tests.lib.simple_pipes_script_asset import ( + SimplePipesScriptAsset as SimplePipesScriptAsset, +) diff --git a/python_modules/libraries/dagster-components/dagster_components_tests/lib/all_metadata_empty_asset.py b/python_modules/libraries/dagster-components/dagster_components_tests/lib/all_metadata_empty_asset.py new file mode 100644 index 0000000000000..339f5761d21b8 --- /dev/null +++ b/python_modules/libraries/dagster-components/dagster_components_tests/lib/all_metadata_empty_asset.py @@ -0,0 +1,34 @@ +from typing import TYPE_CHECKING, Any + +from dagster._core.definitions.decorators.asset_decorator import asset +from dagster._core.definitions.definitions_class import Definitions +from dagster._core.execution.context.asset_execution_context import AssetExecutionContext +from dagster_components import Component, ComponentLoadContext, component +from dagster_components.core.component import ComponentGenerateRequest +from dagster_components.core.component_decl_builder import YamlComponentDecl +from dagster_components.generate import generate_component_yaml +from typing_extensions import Self + +if TYPE_CHECKING: + from dagster_components.core.component import ComponentDeclNode + + +@component(name="all_metadata_empty_asset") +class AllMetadataEmptyAsset(Component): + @classmethod + def from_decl_node( + cls, context: "ComponentLoadContext", decl_node: "ComponentDeclNode" + ) -> Self: + assert isinstance(decl_node, YamlComponentDecl) + return cls() + + @classmethod + def generate_files(cls, request: ComponentGenerateRequest, params: Any) -> None: + generate_component_yaml(request, params) + + def build_defs(self, context: ComponentLoadContext) -> Definitions: + @asset + def hardcoded_asset(context: AssetExecutionContext): + return 1 + + return Definitions(assets=[hardcoded_asset]) diff --git a/python_modules/libraries/dagster-components/dagster_components_tests/lib/simple_asset.py b/python_modules/libraries/dagster-components/dagster_components_tests/lib/simple_asset.py new file mode 100644 index 0000000000000..67f742064ee76 --- /dev/null +++ b/python_modules/libraries/dagster-components/dagster_components_tests/lib/simple_asset.py @@ -0,0 +1,55 @@ +from typing import TYPE_CHECKING + +from dagster._core.definitions.asset_key import AssetKey +from dagster._core.definitions.decorators.asset_decorator import asset +from dagster._core.definitions.definitions_class import Definitions +from dagster._core.execution.context.asset_execution_context import AssetExecutionContext +from dagster_components import Component, ComponentLoadContext, component +from dagster_components.core.component import ComponentGenerateRequest +from dagster_components.core.component_decl_builder import YamlComponentDecl +from dagster_components.generate import generate_component_yaml +from pydantic import BaseModel, TypeAdapter +from typing_extensions import Self + +if TYPE_CHECKING: + from dagster_components.core.component import ComponentDeclNode + + +class SimpleAssetParams(BaseModel): + asset_key: str + value: str + + +@component(name="simple_asset") +class SimpleAsset(Component): + """A simple asset that returns a constant string value.""" + + params_schema = SimpleAssetParams + + @classmethod + def generate_files(cls, request: ComponentGenerateRequest, params: SimpleAssetParams) -> None: + generate_component_yaml(request, params.model_dump()) + + @classmethod + def from_decl_node( + cls, context: "ComponentLoadContext", decl_node: "ComponentDeclNode" + ) -> Self: + assert isinstance(decl_node, YamlComponentDecl) + loaded_params = TypeAdapter(cls.params_schema).validate_python( + decl_node.component_file_model.params + ) + return cls( + asset_key=AssetKey.from_user_string(loaded_params.asset_key), + value=loaded_params.value, + ) + + def __init__(self, asset_key: AssetKey, value: str): + self._asset_key = asset_key + self._value = value + + def build_defs(self, context: ComponentLoadContext) -> Definitions: + @asset(key=self._asset_key) + def dummy(context: AssetExecutionContext): + return self._value + + return Definitions(assets=[dummy]) diff --git a/python_modules/libraries/dagster-components/dagster_components_tests/lib/simple_pipes_script_asset.py b/python_modules/libraries/dagster-components/dagster_components_tests/lib/simple_pipes_script_asset.py new file mode 100644 index 0000000000000..a25b6b213d840 --- /dev/null +++ b/python_modules/libraries/dagster-components/dagster_components_tests/lib/simple_pipes_script_asset.py @@ -0,0 +1,87 @@ +import shutil +from pathlib import Path +from typing import TYPE_CHECKING + +import click +from dagster._core.definitions.asset_key import AssetKey +from dagster._core.definitions.decorators.asset_decorator import asset +from dagster._core.definitions.definitions_class import Definitions +from dagster._core.execution.context.asset_execution_context import AssetExecutionContext +from dagster._core.pipes.subprocess import PipesSubprocessClient +from dagster_components import Component, ComponentLoadContext, component +from dagster_components.core.component import ComponentGenerateRequest +from dagster_components.core.component_decl_builder import YamlComponentDecl +from dagster_components.generate import generate_component_yaml +from pydantic import BaseModel, TypeAdapter +from typing_extensions import Self + +if TYPE_CHECKING: + from dagster_components.core.component import ComponentDeclNode + + +# Same schema used for file generation and defs generation +class SimplePipesScriptAssetParams(BaseModel): + asset_key: str + filename: str + + @staticmethod + @click.command + @click.option("--asset-key", type=str) + @click.option("--filename", type=str) + def cli(asset_key: str, filename: str) -> "SimplePipesScriptAssetParams": + return SimplePipesScriptAssetParams(asset_key=asset_key, filename=filename) + + +_SCRIPT_TEMPLATE = """ +from dagster_pipes import open_dagster_pipes + +context = open_dagster_pipes() + +context.log.info("Materializing asset {asset_key} from pipes") +context.report_asset_materialization(asset_key="{asset_key}") +""" + + +@component(name="simple_pipes_script_asset") +class SimplePipesScriptAsset(Component): + """A simple asset that runs a Python script with the Pipes subprocess client. + + Because it is a pipes asset, no value is returned. + """ + + generate_params_schema = SimplePipesScriptAssetParams + params_schema = SimplePipesScriptAssetParams + + @classmethod + def generate_files( + cls, request: ComponentGenerateRequest, params: SimplePipesScriptAssetParams + ) -> None: + generate_component_yaml(request, params.model_dump()) + Path(request.component_instance_root_path, params.filename).write_text( + _SCRIPT_TEMPLATE.format(asset_key=params.asset_key) + ) + + @classmethod + def from_decl_node( + cls, context: "ComponentLoadContext", decl_node: "ComponentDeclNode" + ) -> Self: + assert isinstance(decl_node, YamlComponentDecl) + loaded_params = TypeAdapter(cls.params_schema).validate_python( + decl_node.component_file_model.params + ) + return cls( + asset_key=AssetKey.from_user_string(loaded_params.asset_key), + script_path=decl_node.path / loaded_params.filename, + ) + + def __init__(self, asset_key: AssetKey, script_path: Path): + self._asset_key = asset_key + self._script_path = script_path + + def build_defs(self, context: ComponentLoadContext) -> Definitions: + @asset(key=self._asset_key) + def _asset(context: AssetExecutionContext, pipes_client: PipesSubprocessClient): + cmd = [shutil.which("python"), self._script_path] + return pipes_client.run(command=cmd, context=context).get_results() + + return Definitions(assets=[_asset]) diff --git a/python_modules/libraries/dagster-components/dagster_components_tests/stub_code_locations/dbt_project_location/components/jaffle_shop_dbt/component.yaml b/python_modules/libraries/dagster-components/dagster_components_tests/stub_code_locations/dbt_project_location/components/jaffle_shop_dbt/component.yaml index 85c04ada2fb8d..f36940bd32cd0 100644 --- a/python_modules/libraries/dagster-components/dagster_components_tests/stub_code_locations/dbt_project_location/components/jaffle_shop_dbt/component.yaml +++ b/python_modules/libraries/dagster-components/dagster_components_tests/stub_code_locations/dbt_project_location/components/jaffle_shop_dbt/component.yaml @@ -3,3 +3,15 @@ type: dagster_components.dbt_project params: dbt: project_dir: jaffle_shop + + asset_attributes: + - tags: + foo: bar + metadata: + something: 1 + automation_condition: + type: on_cron + params: + cron_schedule: "@daily" + - tags: + another: one diff --git a/python_modules/libraries/dagster-components/dagster_components_tests/unit_tests/test_component_rendering.py b/python_modules/libraries/dagster-components/dagster_components_tests/unit_tests/test_component_rendering.py new file mode 100644 index 0000000000000..6e036492363c1 --- /dev/null +++ b/python_modules/libraries/dagster-components/dagster_components_tests/unit_tests/test_component_rendering.py @@ -0,0 +1,79 @@ +from typing import Optional, Sequence + +import pytest +from dagster_components.core.component_rendering import ( + RenderingScope, + TemplatedValueResolver, + _should_render, + preprocess_value, +) +from pydantic import BaseModel, Field, TypeAdapter + + +class Inner(BaseModel): + a: Optional[str] = None + deferred: Optional[str] = RenderingScope(required_scope={"foo", "bar", "baz"}) + + +class Outer(BaseModel): + a: str + deferred: str = RenderingScope(required_scope={"a"}) + inner: Sequence[Inner] + inner_deferred: Sequence[Inner] = RenderingScope(required_scope={"b"}) + + inner_optional: Optional[Sequence[Inner]] = None + inner_deferred_optional: Optional[Sequence[Inner]] = RenderingScope( + Field(default=None), required_scope={"b"} + ) + + +@pytest.mark.parametrize( + "path,expected", + [ + (["a"], True), + (["deferred"], False), + (["inner", 0, "a"], True), + (["inner", 0, "deferred"], False), + (["inner_deferred", 0, "a"], False), + (["inner_deferred", 0, "deferred"], False), + (["inner_optional"], True), + (["inner_optional", 0, "a"], True), + (["inner_optional", 0, "deferred"], False), + (["inner_deferred_optional", 0], False), + (["inner_deferred_optional", 0, "a"], False), + (["NONEXIST", 0, "deferred"], False), + ], +) +def test_should_render(path, expected: bool) -> None: + assert _should_render(path, Outer.model_json_schema(), Outer.model_json_schema()) == expected + + +def test_render() -> None: + data = { + "a": "{{ foo_val }}", + "deferred": "{{ deferred }}", + "inner": [ + {"a": "{{ bar_val }}", "deferred": "{{ deferred }}"}, + {"a": "zzz", "deferred": "zzz"}, + ], + "inner_deferred": [ + {"a": "{{ deferred }}", "deferred": "zzz"}, + ], + } + + renderer = TemplatedValueResolver(context={"foo_val": "foo", "bar_val": "bar"}) + rendered_data = preprocess_value(renderer, data, Outer) + + assert rendered_data == { + "a": "foo", + "deferred": "{{ deferred }}", + "inner": [ + {"a": "bar", "deferred": "{{ deferred }}"}, + {"a": "zzz", "deferred": "zzz"}, + ], + "inner_deferred": [ + {"a": "{{ deferred }}", "deferred": "zzz"}, + ], + } + + TypeAdapter(Outer).validate_python(rendered_data) diff --git a/python_modules/libraries/dagster-components/dagster_components_tests/unit_tests/test_pipes_subprocess_script_collection.py b/python_modules/libraries/dagster-components/dagster_components_tests/unit_tests/test_pipes_subprocess_script_collection.py index ae4206ef1fe58..69409ae590a51 100644 --- a/python_modules/libraries/dagster-components/dagster_components_tests/unit_tests/test_pipes_subprocess_script_collection.py +++ b/python_modules/libraries/dagster-components/dagster_components_tests/unit_tests/test_pipes_subprocess_script_collection.py @@ -25,24 +25,32 @@ def test_python_native() -> None: def test_python_params() -> None: - component = PipesSubprocessScriptCollection.from_decl_node( - load_context=script_load_context(), - component_decl=YamlComponentDecl( - path=LOCATION_PATH / "components" / "scripts", - component_file_model=ComponentFileModel( - type="pipes_subprocess_script_collection", - params={ - "scripts": [ - { - "path": "script_one.py", - "assets": [{"key": "a"}, {"key": "b", "deps": ["up1", "up2"]}], - }, - {"path": "subdir/script_three.py", "assets": [{"key": "key_override"}]}, - ] - }, - ), + component_decl = YamlComponentDecl( + path=LOCATION_PATH / "components" / "scripts", + component_file_model=ComponentFileModel( + type="pipes_subprocess_script_collection", + params={ + "scripts": [ + { + "path": "script_one.py", + "assets": [ + {"key": "a", "automation_condition": {"type": "eager"}}, + { + "key": "b", + "automation_condition": { + "type": "on_cron", + "params": {"cron_schedule": "@daily"}, + }, + "deps": ["up1", "up2"], + }, + ], + }, + {"path": "subdir/script_three.py", "assets": [{"key": "key_override"}]}, + ] + }, ), ) + component = PipesSubprocessScriptCollection.load(context=script_load_context(component_decl)) assert get_asset_keys(component) == { AssetKey("a"), AssetKey("b"), diff --git a/python_modules/libraries/dagster-components/dagster_components_tests/unit_tests/test_registry.py b/python_modules/libraries/dagster-components/dagster_components_tests/unit_tests/test_registry.py index 0c52535b44096..07f26ca1ed6f8 100644 --- a/python_modules/libraries/dagster-components/dagster_components_tests/unit_tests/test_registry.py +++ b/python_modules/libraries/dagster-components/dagster_components_tests/unit_tests/test_registry.py @@ -1,16 +1,47 @@ import os import subprocess import sys +import tempfile +import textwrap +from contextlib import contextmanager from pathlib import Path +from typing import Iterator, List, Sequence + +@contextmanager +def _temp_venv(install_args: Sequence[str]) -> Iterator[Path]: + # Create venv + with tempfile.TemporaryDirectory() as tmpdir: + venv_dir = Path(tmpdir) / ".venv" + subprocess.check_call(["uv", "venv", str(venv_dir)]) + python_executable = ( + venv_dir + / ("Scripts" if sys.platform == "win32" else "bin") + / ("python.exe" if sys.platform == "win32" else "python") + ) + subprocess.check_call( + ["uv", "pip", "install", "--python", str(python_executable), *install_args] + ) + yield python_executable + + +COMPONENT_PRINT_SCRIPT = """ from dagster_components import ComponentRegistry +registry = ComponentRegistry.from_entry_point_discovery() +for component_name in list(registry.keys()): + print(component_name) +""" + -def test_components_from_dagster(): - registry = ComponentRegistry.from_entry_point_discovery() - assert registry.has("dagster_components.dbt_project") - assert registry.has("dagster_components.sling_replication") - assert registry.has("dagster_components.pipes_subprocess_script_collection") +def _get_component_types_in_python_environment(python_executable: Path) -> Sequence[str]: + with tempfile.NamedTemporaryFile(mode="w") as f: + f.write(COMPONENT_PRINT_SCRIPT) + f.flush() + result = subprocess.run( + [str(python_executable), f.name], capture_output=True, text=True, check=False + ) + return result.stdout.strip().split("\n") def _find_repo_root(): @@ -22,10 +53,66 @@ def _find_repo_root(): return current -repo_root = _find_repo_root() +def _generate_test_component_source(number: int) -> str: + return textwrap.dedent(f""" + from dagster_components import Component, component + @component(name="test_component_{number}") + class TestComponent{number}(Component): + pass + """) + + +_repo_root = _find_repo_root() + + +def _get_editable_package_root(pkg_name: str) -> str: + possible_locations = [ + _repo_root / "python_modules" / pkg_name, + _repo_root / "python_modules" / "libraries" / pkg_name, + ] + return next(str(loc) for loc in possible_locations if loc.exists()) + + +# ######################## +# ##### TESTS +# ######################## + + +def test_components_from_dagster(): + common_deps: List[str] = [] + for pkg_name in ["dagster", "dagster-pipes"]: + common_deps.extend(["-e", _get_editable_package_root(pkg_name)]) + + components_root = _get_editable_package_root("dagster-components") + dbt_root = _get_editable_package_root("dagster-dbt") + embedded_elt_root = _get_editable_package_root("dagster-embedded-elt") + + # No extras + with _temp_venv([*common_deps, "-e", components_root]) as python_executable: + component_types = _get_component_types_in_python_environment(python_executable) + assert "dagster_components.pipes_subprocess_script_collection" in component_types + assert "dagster_components.dbt_project" not in component_types + assert "dagster_components.sling_replication" not in component_types + + with _temp_venv( + [*common_deps, "-e", f"{components_root}[dbt]", "-e", dbt_root] + ) as python_executable: + component_types = _get_component_types_in_python_environment(python_executable) + assert "dagster_components.pipes_subprocess_script_collection" in component_types + assert "dagster_components.dbt_project" in component_types + assert "dagster_components.sling_replication" not in component_types + + with _temp_venv( + [*common_deps, "-e", f"{components_root}[sling]", "-e", embedded_elt_root] + ) as python_executable: + component_types = _get_component_types_in_python_environment(python_executable) + assert "dagster_components.pipes_subprocess_script_collection" in component_types + assert "dagster_components.dbt_project" not in component_types + assert "dagster_components.sling_replication" in component_types + # Our pyproject.toml installs local dagster components -PYPROJECT_TOML = f""" +DAGSTER_FOO_PYPROJECT_TOML = """ [build-system] requires = ["setuptools", "wheel"] build-backend = "setuptools.build_meta" @@ -35,48 +122,20 @@ def _find_repo_root(): version = "0.1.0" description = "A simple example package" authors = [ - {{ name = "Your Name", email = "your.email@example.com" }} + { name = "Your Name", email = "your.email@example.com" } ] dependencies = [ - "dagster", "dagster-components", - "dagster-dbt", - "dagster-embedded-elt", ] -[tool.uv.sources] -dagster = {{ path = "{repo_root}/python_modules/dagster" }} -dagster-pipes = {{ path = "{repo_root}/python_modules/dagster-pipes" }} -dagster-components = {{ path = "{repo_root}/python_modules/libraries/dagster-components" }} -dagster-dbt = {{ path = "{repo_root}/python_modules/libraries/dagster-dbt" }} -dagster-embedded-elt = {{ path = "{repo_root}/python_modules/libraries/dagster-embedded-elt" }} - [project.entry-points] -"dagster.components" = {{ dagster_foo = "dagster_foo.lib"}} -""" - -TEST_COMPONENT_1 = """ -from dagster_components import Component, component - -@component(name="test_component_1") -class TestComponent1(Component): - pass -""" - -TEST_COMPONENT_2 = """ -from dagster_components import Component, component - -@component(name="test_component_2") -class TestComponent2(Component): - pass +"dagster.components" = { dagster_foo = "dagster_foo.lib"} """ -COMPONENT_PRINT_SCRIPT = """ -from dagster_components import ComponentRegistry +DAGSTER_FOO_LIB_ROOT = f""" +{_generate_test_component_source(1)} -registry = ComponentRegistry.from_entry_point_discovery() -for component_name in list(registry.keys()): - print(component_name) +from dagster_foo.lib.sub import TestComponent2 """ @@ -85,35 +144,29 @@ def test_components_from_third_party_lib(tmpdir): # Create test package that defines some components os.makedirs("dagster-foo") with open("dagster-foo/pyproject.toml", "w") as f: - f.write(PYPROJECT_TOML) + f.write(DAGSTER_FOO_PYPROJECT_TOML) os.makedirs("dagster-foo/dagster_foo/lib/sub") with open("dagster-foo/dagster_foo/lib/__init__.py", "w") as f: - f.write(TEST_COMPONENT_1) + f.write(DAGSTER_FOO_LIB_ROOT) with open("dagster-foo/dagster_foo/lib/sub/__init__.py", "w") as f: - f.write(TEST_COMPONENT_2) - - # Create venv - venv_dir = Path(".venv") - subprocess.check_call(["uv", "venv", str(venv_dir)]) - python_executable = ( - venv_dir - / ("Scripts" if sys.platform == "win32" else "bin") - / ("python.exe" if sys.platform == "win32" else "python") - ) - - # Script to print components - with open("print_components.py", "w") as f: - f.write(COMPONENT_PRINT_SCRIPT) - - # subprocess.check_call([pip_executable, "install", "-e", "dagster-foo"]) - subprocess.check_call( - ["uv", "pip", "install", "--python", str(python_executable), "-e", "dagster-foo"] - ) - result = subprocess.run( - [python_executable, "print_components.py"], capture_output=True, text=True, check=False - ) - assert "dagster_foo.test_component_1" in result.stdout - assert "dagster_foo.test_component_2" in result.stdout + f.write(_generate_test_component_source(2)) + + # Need pipes because dependency of dagster + deps = [ + "-e", + _get_editable_package_root("dagster"), + "-e", + _get_editable_package_root("dagster-components"), + "-e", + _get_editable_package_root("dagster-pipes"), + "-e", + "dagster-foo", + ] + + with _temp_venv(deps) as python_executable: + component_types = _get_component_types_in_python_environment(python_executable) + assert "dagster_foo.test_component_1" in component_types + assert "dagster_foo.test_component_2" in component_types diff --git a/python_modules/libraries/dagster-components/dagster_components_tests/unit_tests/test_spec_processing.py b/python_modules/libraries/dagster-components/dagster_components_tests/unit_tests/test_spec_processing.py new file mode 100644 index 0000000000000..6af75bf8544e4 --- /dev/null +++ b/python_modules/libraries/dagster-components/dagster_components_tests/unit_tests/test_spec_processing.py @@ -0,0 +1,63 @@ +import pytest +from dagster import AssetKey, AssetSpec, Definitions +from dagster_components.core.dsl_schema import AssetAttributes, MergeAttributes, ReplaceAttributes +from pydantic import BaseModel, TypeAdapter + + +class M(BaseModel): + asset_attributes: AssetAttributes = [] + + +defs = Definitions( + assets=[ + AssetSpec("a", group_name="g1"), + AssetSpec("b", group_name="g2"), + AssetSpec("c", group_name="g2", tags={"tag": "val"}), + ], +) + + +def test_replace_attributes() -> None: + op = ReplaceAttributes(operation="replace", target="group:g2", tags={"newtag": "newval"}) + + newdefs = op.apply(defs) + asset_graph = newdefs.get_asset_graph() + assert asset_graph.get(AssetKey("a")).tags == {} + assert asset_graph.get(AssetKey("b")).tags == {"newtag": "newval"} + assert asset_graph.get(AssetKey("c")).tags == {"newtag": "newval"} + + +def test_merge_attributes() -> None: + op = MergeAttributes(operation="merge", target="group:g2", tags={"newtag": "newval"}) + + newdefs = op.apply(defs) + asset_graph = newdefs.get_asset_graph() + assert asset_graph.get(AssetKey("a")).tags == {} + assert asset_graph.get(AssetKey("b")).tags == {"newtag": "newval"} + assert asset_graph.get(AssetKey("c")).tags == {"tag": "val", "newtag": "newval"} + + +@pytest.mark.parametrize( + "python,expected", + [ + # default to merge and a * target + ({"tags": {"a": "b"}}, MergeAttributes(target="*", tags={"a": "b"})), + ( + {"operation": "replace", "tags": {"a": "b"}}, + ReplaceAttributes(operation="replace", target="*", tags={"a": "b"}), + ), + # explicit target + ( + {"tags": {"a": "b"}, "target": "group:g2"}, + MergeAttributes(target="group:g2", tags={"a": "b"}), + ), + ( + {"operation": "replace", "tags": {"a": "b"}, "target": "group:g2"}, + ReplaceAttributes(operation="replace", target="group:g2", tags={"a": "b"}), + ), + ], +) +def test_load_attributes(python, expected) -> None: + loaded = TypeAdapter(AssetAttributes).validate_python([python]) + assert len(loaded) == 1 + assert loaded[0] == expected diff --git a/python_modules/libraries/dagster-components/dagster_components_tests/utils.py b/python_modules/libraries/dagster-components/dagster_components_tests/utils.py index 51bce9a14ea32..9dd4669a4fe8f 100644 --- a/python_modules/libraries/dagster-components/dagster_components_tests/utils.py +++ b/python_modules/libraries/dagster-components/dagster_components_tests/utils.py @@ -1,16 +1,28 @@ +import textwrap +from contextlib import contextmanager +from pathlib import Path +from tempfile import TemporaryDirectory +from typing import AbstractSet, Iterator, Optional + from dagster import AssetKey, DagsterInstance -from dagster_components.core.component import Component, ComponentLoadContext, ComponentRegistry +from dagster._utils import pushd +from dagster_components.core.component import ( + Component, + ComponentDeclNode, + ComponentLoadContext, + ComponentRegistry, +) def registry() -> ComponentRegistry: return ComponentRegistry.from_entry_point_discovery() -def script_load_context() -> ComponentLoadContext: - return ComponentLoadContext(registry=registry(), resources={}) +def script_load_context(decl_node: Optional[ComponentDeclNode] = None) -> ComponentLoadContext: + return ComponentLoadContext.for_test(registry=registry(), decl_node=decl_node) -def get_asset_keys(component: Component) -> set[AssetKey]: +def get_asset_keys(component: Component) -> AbstractSet[AssetKey]: return { key for key in component.build_defs(ComponentLoadContext.for_test()) @@ -26,3 +38,45 @@ def assert_assets(component: Component, expected_assets: int) -> None: instance=DagsterInstance.ephemeral() ) assert result.success + + +def generate_component_lib_pyproject_toml(name: str, is_code_location: bool = False) -> str: + pkg_name = name.replace("-", "_") + base = textwrap.dedent(f""" + [build-system] + requires = ["setuptools", "wheel"] + build-backend = "setuptools.build_meta" + + [project] + name = "{name}" + version = "0.1.0" + dependencies = [ + "dagster-components", + ] + + [project.entry-points] + "dagster.components" = {{ {pkg_name} = "{pkg_name}.lib"}} + """) + if is_code_location: + return base + textwrap.dedent(""" + [tool.dagster] + module_name = "{ pkg_name }.definitions" + project_name = "{ pkg_name }" + """) + else: + return base + + +@contextmanager +def temp_code_location_bar() -> Iterator[None]: + with TemporaryDirectory() as tmpdir, pushd(tmpdir): + Path("bar/bar/lib").mkdir(parents=True) + Path("bar/bar/components").mkdir(parents=True) + with open("bar/pyproject.toml", "w") as f: + f.write(generate_component_lib_pyproject_toml("bar", is_code_location=True)) + Path("bar/bar/__init__.py").touch() + Path("bar/bar/definitions.py").touch() + Path("bar/bar/lib/__init__.py").touch() + + with pushd("bar"): + yield diff --git a/python_modules/libraries/dagster-components/setup.py b/python_modules/libraries/dagster-components/setup.py index 903e3fa1c9f7b..1c89e6f5f31f4 100644 --- a/python_modules/libraries/dagster-components/setup.py +++ b/python_modules/libraries/dagster-components/setup.py @@ -36,7 +36,7 @@ def get_version() -> str: ], packages=find_packages(exclude=["dagster_components_tests*", "examples*"]), install_requires=[ - f"dagster{pin}", + "dagster>=1.9.5", "tomli", ], zip_safe=False, @@ -46,6 +46,7 @@ def get_version() -> str: ], "dagster.components": [ "dagster_components = dagster_components.lib", + "dagster_components.test = dagster_components_tests.lib", ], }, extras_require={ diff --git a/python_modules/libraries/dagster-databricks/dagster_databricks/databricks.py b/python_modules/libraries/dagster-databricks/dagster_databricks/databricks.py index afbd41a4a8786..9a5c6656c6c44 100644 --- a/python_modules/libraries/dagster-databricks/dagster_databricks/databricks.py +++ b/python_modules/libraries/dagster-databricks/dagster_databricks/databricks.py @@ -374,7 +374,7 @@ def wait_for_run_to_complete( logger: logging.Logger, databricks_run_id: int, poll_interval_sec: float, - max_wait_time_sec: int, + max_wait_time_sec: float, verbose_logs: bool = True, ) -> None: logger.info(f"Waiting for Databricks run `{databricks_run_id}` to complete...") @@ -413,7 +413,7 @@ def __init__( azure_client_secret: Optional[str] = None, azure_tenant_id: Optional[str] = None, poll_interval_sec: float = 5, - max_wait_time_sec: int = DEFAULT_RUN_MAX_WAIT_TIME_SEC, + max_wait_time_sec: float = DEFAULT_RUN_MAX_WAIT_TIME_SEC, ): self.host = check.opt_str_param(host, "host") self.token = check.opt_str_param(token, "token") diff --git a/python_modules/libraries/dagster-databricks/dagster_databricks/ops.py b/python_modules/libraries/dagster-databricks/dagster_databricks/ops.py index 56d70045daae9..bb0afabb36e54 100644 --- a/python_modules/libraries/dagster-databricks/dagster_databricks/ops.py +++ b/python_modules/libraries/dagster-databricks/dagster_databricks/ops.py @@ -93,7 +93,7 @@ class DatabricksRunNowOpConfig(Config): default=_poll_interval_seconds, description="Check whether the Databricks Job is done at this interval, in seconds.", ) - max_wait_time_seconds: int = Field( + max_wait_time_seconds: float = Field( default=_max_wait_time_seconds, description=( "If the Databricks Job is not complete after this length of time, in seconds," @@ -205,7 +205,7 @@ class DatabricksSubmitRunOpConfig(Config): default=_poll_interval_seconds, description="Check whether the Databricks Job is done at this interval, in seconds.", ) - max_wait_time_seconds: int = Field( + max_wait_time_seconds: float = Field( default=_max_wait_time_seconds, description=( "If the Databricks Job is not complete after this length of time, in seconds," diff --git a/python_modules/libraries/dagster-dbt/dagster_dbt/asset_utils.py b/python_modules/libraries/dagster-dbt/dagster_dbt/asset_utils.py index 5b2d8f9cb7d5b..68fdaea3e2629 100644 --- a/python_modules/libraries/dagster-dbt/dagster_dbt/asset_utils.py +++ b/python_modules/libraries/dagster-dbt/dagster_dbt/asset_utils.py @@ -573,7 +573,7 @@ def default_description_fn(dbt_resource_props: Mapping[str, Any], display_raw_sq or f"dbt {dbt_resource_props['resource_type']} {dbt_resource_props['name']}", ] if display_raw_sql: - description_sections.append(f"#### Raw SQL:\n```\n{code_block}\n```") + description_sections.append(f"#### Raw SQL:\n```sql\n{code_block}\n```") return "\n\n".join(filter(None, description_sections)) diff --git a/python_modules/libraries/dagster-dbt/dagster_dbt/core/dbt_event_iterator.py b/python_modules/libraries/dagster-dbt/dagster_dbt/core/dbt_event_iterator.py index 508e90c00b95e..c3364a133006f 100644 --- a/python_modules/libraries/dagster-dbt/dagster_dbt/core/dbt_event_iterator.py +++ b/python_modules/libraries/dagster-dbt/dagster_dbt/core/dbt_event_iterator.py @@ -1,6 +1,5 @@ -from collections import abc from concurrent.futures import ThreadPoolExecutor -from typing import TYPE_CHECKING, Any, Callable, Dict, Generic, Iterator, Optional, Union, cast +from typing import TYPE_CHECKING, Any, Callable, Dict, Iterator, Optional, Union, cast from dagster import ( AssetCheckResult, @@ -186,7 +185,7 @@ def _fetch_row_count_metadata( return None -class DbtEventIterator(Generic[T], abc.Iterator): +class DbtEventIterator(Iterator[T]): """A wrapper around an iterator of dbt events which contains additional methods for post-processing the events, such as fetching row counts for materialized tables. """ diff --git a/python_modules/libraries/dagster-dbt/dagster_dbt/dbt_core_version.py b/python_modules/libraries/dagster-dbt/dagster_dbt/dbt_core_version.py index dde70ceb0fadb..0745c1b40c0cf 100644 --- a/python_modules/libraries/dagster-dbt/dagster_dbt/dbt_core_version.py +++ b/python_modules/libraries/dagster-dbt/dagster_dbt/dbt_core_version.py @@ -1 +1 @@ -DBT_CORE_VERSION_UPPER_BOUND = "1.9" +DBT_CORE_VERSION_UPPER_BOUND = "1.10" diff --git a/python_modules/libraries/dagster-deltalake/dagster_deltalake/io_manager.py b/python_modules/libraries/dagster-deltalake/dagster_deltalake/io_manager.py index 17ce19d31c1af..131e1475a260f 100644 --- a/python_modules/libraries/dagster-deltalake/dagster_deltalake/io_manager.py +++ b/python_modules/libraries/dagster-deltalake/dagster_deltalake/io_manager.py @@ -125,11 +125,13 @@ def my_table_a(my_table: pd.DataFrame): root_uri: str = Field(description="Storage location where Delta tables are stored.") mode: WriteMode = Field( - default=WriteMode.overwrite.value, description="The write mode passed to save the output." + default=WriteMode.overwrite.value, # type: ignore + description="The write mode passed to save the output.", ) overwrite_schema: bool = Field(default=False) writer_engine: WriterEngine = Field( - default=WriterEngine.pyarrow.value, description="Engine passed to write_deltalake." + default=WriterEngine.pyarrow.value, # type: ignore + description="Engine passed to write_deltalake.", ) storage_options: Union[AzureConfig, S3Config, LocalConfig, GcsConfig] = Field( diff --git a/python_modules/libraries/dagster-dg/dagster_dg/cache.py b/python_modules/libraries/dagster-dg/dagster_dg/cache.py new file mode 100644 index 0000000000000..71afef32046e9 --- /dev/null +++ b/python_modules/libraries/dagster-dg/dagster_dg/cache.py @@ -0,0 +1,74 @@ +import shutil +import sys +from pathlib import Path +from typing import Final, Literal, Optional, Tuple + +from typing_extensions import Self, TypeAlias + +from dagster_dg.config import DgConfig + +_CACHE_CONTAINER_DIR_NAME: Final = "dg-cache" + +CachableDataType: TypeAlias = Literal["component_registry_data"] + + +def get_default_cache_dir() -> Path: + if sys.platform == "win32": + return Path.home() / "AppData" / "dg" / "cache" + elif sys.platform == "darwin": + return Path.home() / "Library" / "Caches" / "dg" + else: + return Path.home() / ".cache" / "dg" + + +class DgCache: + @classmethod + def from_default(cls) -> Self: + return cls.from_parent_path(get_default_cache_dir()) + + @classmethod + def from_config(cls, config: DgConfig) -> Self: + return cls.from_parent_path( + parent_path=config.cache_dir, + logging_enabled=config.verbose, + ) + + # This is the preferred constructor to use when creating a cache. It ensures that all data is + # stored inside an additional container directory inside the user-specified cache directory. + # When we clear the cache, we only delete this container directory. This is to avoid accidents + # when the user mistakenly specifies a cache directory that contains other data. + @classmethod + def from_parent_path(cls, parent_path: Path, logging_enabled: bool = False) -> Self: + root_path = parent_path / _CACHE_CONTAINER_DIR_NAME + return cls(root_path, logging_enabled) + + def __init__(self, root_path: Path, logging_enabled: bool): + self._root_path = root_path + self._root_path.mkdir(parents=True, exist_ok=True) + self._logging_enabled = logging_enabled + + def clear(self) -> None: + shutil.rmtree(self._root_path) + self.log(f"CACHE [clear]: {self._root_path}") + + def get(self, key: Tuple[str, ...]) -> Optional[str]: + path = self._get_path(key) + if path.exists(): + self.log(f"CACHE [hit]: {path}") + return path.read_text() + else: + self.log(f"CACHE [miss]: {path}") + return None + + def set(self, key: Tuple[str, ...], value: str) -> None: + path = self._get_path(key) + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(value) + self.log(f"CACHE [write]: {path}") + + def _get_path(self, key: Tuple[str, ...]) -> Path: + return Path(self._root_path, *key) + + def log(self, message: str) -> None: + if self._logging_enabled: + print(message) # noqa: T201 diff --git a/python_modules/libraries/dagster-dg/dagster_dg/cli/__init__.py b/python_modules/libraries/dagster-dg/dagster_dg/cli/__init__.py index b2fbe3c69f6fc..d8cd4041ec23c 100644 --- a/python_modules/libraries/dagster-dg/dagster_dg/cli/__init__.py +++ b/python_modules/libraries/dagster-dg/dagster_dg/cli/__init__.py @@ -1,23 +1,85 @@ +from pathlib import Path + import click +from dagster_dg.cache import DgCache from dagster_dg.cli.generate import generate_cli +from dagster_dg.cli.info import info_cli from dagster_dg.cli.list import list_cli +from dagster_dg.config import DgConfig, set_config_on_cli_context from dagster_dg.version import __version__ def create_dg_cli(): commands = { "generate": generate_cli, + "info": info_cli, "list": list_cli, } + # Defaults are defined on the DgConfig object. @click.group( commands=commands, context_settings={"max_content_width": 120, "help_option_names": ["-h", "--help"]}, + invoke_without_command=True, + ) + @click.option( + "--builtin-component-lib", + type=str, + default=DgConfig.builtin_component_lib, + help="Specify a builitin component library to use.", + ) + @click.option( + "--verbose", + is_flag=True, + default=DgConfig.verbose, + help="Enable verbose output for debugging.", + ) + @click.option( + "--disable-cache", + is_flag=True, + default=DgConfig.disable_cache, + help="Disable caching of component registry data.", + ) + @click.option( + "--clear-cache", + is_flag=True, + help="Clear the cache before running the command.", + default=False, + ) + @click.option( + "--cache-dir", + type=Path, + default=DgConfig.cache_dir, + help="Specify a directory to use for the cache.", ) @click.version_option(__version__, "--version", "-v") - def group(): - """CLI tools for working with Dagster.""" + @click.pass_context + def group( + context: click.Context, + builtin_component_lib: str, + verbose: bool, + disable_cache: bool, + cache_dir: Path, + clear_cache: bool, + ): + """CLI tools for working with Dagster components.""" + context.ensure_object(dict) + config = DgConfig( + builtin_component_lib=builtin_component_lib, + verbose=verbose, + disable_cache=disable_cache, + cache_dir=cache_dir, + ) + if clear_cache: + DgCache.from_config(config).clear() + if context.invoked_subcommand is None: + context.exit(0) + elif context.invoked_subcommand is None: + click.echo(context.get_help()) + context.exit(0) + + set_config_on_cli_context(context, config) return group diff --git a/python_modules/libraries/dagster-dg/dagster_dg/cli/generate.py b/python_modules/libraries/dagster-dg/dagster_dg/cli/generate.py index 5034c1b5943fd..e8111606e41a5 100644 --- a/python_modules/libraries/dagster-dg/dagster_dg/cli/generate.py +++ b/python_modules/libraries/dagster-dg/dagster_dg/cli/generate.py @@ -6,10 +6,11 @@ import click from dagster_dg.context import ( - CodeLocationProjectContext, - DeploymentProjectContext, - is_inside_code_location_project, - is_inside_deployment_project, + CodeLocationDirectoryContext, + DeploymentDirectoryContext, + DgContext, + is_inside_code_location_directory, + is_inside_deployment_directory, ) from dagster_dg.generate import ( generate_code_location, @@ -25,9 +26,13 @@ def generate_cli() -> None: @generate_cli.command(name="deployment") -@click.argument("path", type=str) -def generate_deployment_command(path: str) -> None: - """Generate a Dagster deployment instance.""" +@click.argument("path", type=Path) +def generate_deployment_command(path: Path) -> None: + """Generate a Dagster deployment file structure. + + The deployment file structure includes a directory for code locations and configuration files + for deploying to Dagster Plus. + """ dir_abspath = os.path.abspath(path) if os.path.exists(dir_abspath): click.echo( @@ -40,20 +45,57 @@ def generate_deployment_command(path: str) -> None: @generate_cli.command(name="code-location") @click.argument("name", type=str) -@click.option("--use-editable-dagster", is_flag=True, default=False) -def generate_code_location_command(name: str, use_editable_dagster: bool) -> None: - """Generate a Dagster code location inside a component.""" - if is_inside_deployment_project(Path.cwd()): - context = DeploymentProjectContext.from_path(Path.cwd()) +@click.option( + "--use-editable-dagster", + type=str, + flag_value="TRUE", + is_flag=False, + default=None, + help=( + "Install Dagster package dependencies from a local Dagster clone. Accepts a path to local Dagster clone root or" + " may be set as a flag (no value is passed). If set as a flag," + " the location of the local Dagster clone will be read from the `DAGSTER_GIT_REPO_DIR` environment variable." + ), +) +@click.pass_context +def generate_code_location_command( + cli_context: click.Context, name: str, use_editable_dagster: Optional[str] +) -> None: + """Generate a Dagster code location file structure and a uv-managed virtual environment scoped + to the code location. + + This command can be run inside or outside of a deployment directory. If run inside a deployment, + the code location will be created within the deployment directory's code location directory. + + The code location file structure defines a Python package with some pre-existing internal + structure: + + ├── + │ ├── __init__.py + │ ├── components + │ ├── definitions.py + │ └── lib + │ └── __init__.py + ├── _tests + │ └── __init__.py + └── pyproject.toml + + The `.components` directory holds components (which can be created with `dg generate + component`). The `.lib` directory holds custom component types scoped to the code + location (which can be created with `dg generate component-type`). + """ + dg_context = DgContext.from_cli_context(cli_context) + if is_inside_deployment_directory(Path.cwd()): + context = DeploymentDirectoryContext.from_path(Path.cwd(), dg_context) if context.has_code_location(name): click.echo(click.style(f"A code location named {name} already exists.", fg="red")) sys.exit(1) - code_location_path = os.path.join(context.code_location_root_path, name) + code_location_path = context.code_location_root_path / name else: - code_location_path = os.path.join(Path.cwd(), name) + code_location_path = Path.cwd() / name - if use_editable_dagster: - if "DAGSTER_GIT_REPO_DIR" not in os.environ: + if use_editable_dagster == "TRUE": + if not os.environ.get("DAGSTER_GIT_REPO_DIR"): click.echo( click.style( "The `--use-editable-dagster` flag requires the `DAGSTER_GIT_REPO_DIR` environment variable to be set.", @@ -62,6 +104,8 @@ def generate_code_location_command(name: str, use_editable_dagster: bool) -> Non ) sys.exit(1) editable_dagster_root = os.environ["DAGSTER_GIT_REPO_DIR"] + elif use_editable_dagster: # a string value was passed + editable_dagster_root = use_editable_dagster else: editable_dagster_root = None @@ -70,44 +114,80 @@ def generate_code_location_command(name: str, use_editable_dagster: bool) -> Non @generate_cli.command(name="component-type") @click.argument("name", type=str) -def generate_component_type_command(name: str) -> None: - """Generate a Dagster component instance.""" - if not is_inside_code_location_project(Path.cwd()): +@click.pass_context +def generate_component_type_command(cli_context: click.Context, name: str) -> None: + """Generate a scaffold of a custom Dagster component type. + + This command must be run inside a Dagster code location directory. The component type scaffold + will be generated in submodule `.lib.`. + """ + dg_context = DgContext.from_cli_context(cli_context) + if not is_inside_code_location_directory(Path.cwd()): click.echo( click.style( - "This command must be run inside a Dagster code location project.", fg="red" + "This command must be run inside a Dagster code location directory.", fg="red" ) ) sys.exit(1) - context = CodeLocationProjectContext.from_path(Path.cwd()) + context = CodeLocationDirectoryContext.from_path(Path.cwd(), dg_context) full_component_name = f"{context.name}.{name}" if context.has_component_type(full_component_name): click.echo(click.style(f"A component type named `{name}` already exists.", fg="red")) sys.exit(1) - generate_component_type(context.component_types_root_path, name) + generate_component_type(context, name) @generate_cli.command(name="component") -@click.argument("component_type", type=str) +@click.argument( + "component_type", + type=str, +) @click.argument("component_name", type=str) -@click.option("--json-params", type=str, default=None) +@click.option("--json-params", type=str, default=None, help="JSON string of component parameters.") @click.argument("extra_args", nargs=-1, type=str) +@click.pass_context def generate_component_command( + cli_context: click.Context, component_type: str, component_name: str, json_params: Optional[str], extra_args: Tuple[str, ...], ) -> None: - if not is_inside_code_location_project(Path.cwd()): + """Generate a scaffold of a Dagster component. + + This command must be run inside a Dagster code location directory. The component scaffold will be + generated in submodule `.components.`. + + The COMPONENT_TYPE must be a registered component type in the code location environment. + You can view all registered component types with `dg list component-types`. The COMPONENT_NAME + will be used to name the submodule created under .components. + + Components can optionally be passed generate parameters. There are two ways to do this: + + - Passing --json-params with a JSON string of parameters. For example: + + dg generate component foo.bar my_component --json-params '{"param1": "value", "param2": "value"}'`. + + - Passing key-value pairs as space-separated EXTRA_ARGS after `--`. For example: + + dg generate component foo.bar my_component -- param1=value param2=value + + When key-value pairs are used, the value type will be inferred from the + underlying component generation schema. + + It is an error to pass both --json-params and EXTRA_ARGS. + """ + dg_context = DgContext.from_cli_context(cli_context) + if not is_inside_code_location_directory(Path.cwd()): click.echo( click.style( - "This command must be run inside a Dagster code location project.", fg="red" + "This command must be run inside a Dagster code location directory.", fg="red" ) ) sys.exit(1) - context = CodeLocationProjectContext.from_path(Path.cwd()) + context = CodeLocationDirectoryContext.from_path(Path.cwd(), dg_context) if not context.has_component_type(component_type): click.echo( click.style(f"No component type `{component_type}` could be resolved.", fg="red") @@ -119,10 +199,21 @@ def generate_component_command( ) sys.exit(1) + if json_params is not None and extra_args: + click.echo( + click.style( + "Detected both --json-params and EXTRA_ARGS. These are mutually exclusive means of passing" + " component generation parameters. Use only one.", + fg="red", + ) + ) + sys.exit(1) + generate_component_instance( - context.component_instances_root_path, + Path(context.component_instances_root_path), component_name, component_type, json_params, extra_args, + dg_context, ) diff --git a/python_modules/libraries/dagster-dg/dagster_dg/cli/info.py b/python_modules/libraries/dagster-dg/dagster_dg/cli/info.py new file mode 100644 index 0000000000000..f509963606b3d --- /dev/null +++ b/python_modules/libraries/dagster-dg/dagster_dg/cli/info.py @@ -0,0 +1,92 @@ +import json +import sys +from pathlib import Path +from typing import Any, Mapping + +import click + +from dagster_dg.context import ( + CodeLocationDirectoryContext, + DgContext, + is_inside_code_location_directory, +) + + +@click.group(name="info") +def info_cli(): + """Commands for listing Dagster components and related entities.""" + + +def _serialize_json_schema(schema: Mapping[str, Any]) -> str: + return json.dumps(schema, indent=4) + + +@info_cli.command(name="component-type") +@click.argument("component_type", type=str) +@click.option("--description", is_flag=True, default=False) +@click.option("--generate-params-schema", is_flag=True, default=False) +@click.option("--component-params-schema", is_flag=True, default=False) +@click.pass_context +def info_component_type_command( + cli_context: click.Context, + component_type: str, + description: bool, + generate_params_schema: bool, + component_params_schema: bool, +) -> None: + """Get detailed information on a registered Dagster component type.""" + dg_context = DgContext.from_cli_context(cli_context) + if not is_inside_code_location_directory(Path.cwd()): + click.echo( + click.style( + "This command must be run inside a Dagster code location directory.", fg="red" + ) + ) + sys.exit(1) + + context = CodeLocationDirectoryContext.from_path(Path.cwd(), dg_context) + if not context.has_component_type(component_type): + click.echo( + click.style(f"No component type `{component_type}` could be resolved.", fg="red") + ) + sys.exit(1) + + if sum([description, generate_params_schema, component_params_schema]) > 1: + click.echo( + click.style( + "Only one of --description, --generate-params-schema, and --component-params-schema can be specified.", + fg="red", + ) + ) + sys.exit(1) + + component_type_metadata = context.get_component_type(component_type) + + if description: + if component_type_metadata.description: + click.echo(component_type_metadata.description) + else: + click.echo("No description available.") + elif generate_params_schema: + if component_type_metadata.generate_params_schema: + click.echo(_serialize_json_schema(component_type_metadata.generate_params_schema)) + else: + click.echo("No generate params schema defined.") + elif component_params_schema: + if component_type_metadata.component_params_schema: + click.echo(_serialize_json_schema(component_type_metadata.component_params_schema)) + else: + click.echo("No component params schema defined.") + + # print all available metadata + else: + click.echo(component_type) + if component_type_metadata.description: + click.echo("\nDescription:\n") + click.echo(component_type_metadata.description) + if component_type_metadata.generate_params_schema: + click.echo("\nGenerate params schema:\n") + click.echo(_serialize_json_schema(component_type_metadata.generate_params_schema)) + if component_type_metadata.component_params_schema: + click.echo("\nComponent params schema:\n") + click.echo(_serialize_json_schema(component_type_metadata.component_params_schema)) diff --git a/python_modules/libraries/dagster-dg/dagster_dg/cli/list.py b/python_modules/libraries/dagster-dg/dagster_dg/cli/list.py index 1cb7f0a91d2de..055c5a827079f 100644 --- a/python_modules/libraries/dagster-dg/dagster_dg/cli/list.py +++ b/python_modules/libraries/dagster-dg/dagster_dg/cli/list.py @@ -4,10 +4,11 @@ import click from dagster_dg.context import ( - CodeLocationProjectContext, - DeploymentProjectContext, - is_inside_code_location_project, - is_inside_deployment_project, + CodeLocationDirectoryContext, + DeploymentDirectoryContext, + DgContext, + is_inside_code_location_directory, + is_inside_deployment_directory, ) @@ -17,46 +18,54 @@ def list_cli(): @list_cli.command(name="code-locations") -def list_code_locations_command() -> None: +@click.pass_context +def list_code_locations_command(cli_context: click.Context) -> None: """List code locations in the current deployment.""" - if not is_inside_deployment_project(Path.cwd()): + dg_context = DgContext.from_cli_context(cli_context) + if not is_inside_deployment_directory(Path.cwd()): click.echo( - click.style("This command must be run inside a Dagster deployment project.", fg="red") + click.style("This command must be run inside a Dagster deployment directory.", fg="red") ) sys.exit(1) - context = DeploymentProjectContext.from_path(Path.cwd()) - for code_location in context.list_code_locations(): + context = DeploymentDirectoryContext.from_path(Path.cwd(), dg_context) + for code_location in context.get_code_location_names(): click.echo(code_location) @list_cli.command(name="component-types") -def list_component_types_command() -> None: - """List registered Dagster components.""" - if not is_inside_code_location_project(Path.cwd()): +@click.pass_context +def list_component_types_command(cli_context: click.Context) -> None: + """List registered Dagster components in the current code location environment.""" + dg_context = DgContext.from_cli_context(cli_context) + if not is_inside_code_location_directory(Path.cwd()): click.echo( click.style( - "This command must be run inside a Dagster code location project.", fg="red" + "This command must be run inside a Dagster code location directory.", fg="red" ) ) sys.exit(1) - context = CodeLocationProjectContext.from_path(Path.cwd()) - for component_type in context.list_component_types(): - click.echo(component_type) + context = CodeLocationDirectoryContext.from_path(Path.cwd(), dg_context) + for key, component_type in context.iter_component_types(): + click.echo(key) + if component_type.summary: + click.echo(f" {component_type.summary}") @list_cli.command(name="components") -def list_components_command() -> None: - """List Dagster component instances in a code location.""" - if not is_inside_code_location_project(Path.cwd()): +@click.pass_context +def list_components_command(cli_context: click.Context) -> None: + """List Dagster component instances defined in the current code location.""" + dg_context = DgContext.from_cli_context(cli_context) + if not is_inside_code_location_directory(Path.cwd()): click.echo( click.style( - "This command must be run inside a Dagster code location project.", fg="red" + "This command must be run inside a Dagster code location directory.", fg="red" ) ) sys.exit(1) - context = CodeLocationProjectContext.from_path(Path.cwd()) - for component_name in context.component_instances: + context = CodeLocationDirectoryContext.from_path(Path.cwd(), dg_context) + for component_name in context.get_component_instance_names(): click.echo(component_name) diff --git a/python_modules/libraries/dagster-dg/dagster_dg/component.py b/python_modules/libraries/dagster-dg/dagster_dg/component.py index 129c4d6c388ad..304cb45dd497d 100644 --- a/python_modules/libraries/dagster-dg/dagster_dg/component.py +++ b/python_modules/libraries/dagster-dg/dagster_dg/component.py @@ -1,16 +1,20 @@ import copy from dataclasses import dataclass -from typing import Any, Dict, Iterable, Mapping +from typing import Any, Dict, Iterable, Mapping, Optional @dataclass class RemoteComponentType: name: str + package: str + summary: Optional[str] + description: Optional[str] + generate_params_schema: Optional[Mapping[str, Any]] # json schema + component_params_schema: Optional[Mapping[str, Any]] # json schema @property def key(self) -> str: return self.name - # return f"{self.package}.{self.name}" class RemoteComponentRegistry: diff --git a/python_modules/libraries/dagster-dg/dagster_dg/config.py b/python_modules/libraries/dagster-dg/dagster_dg/config.py new file mode 100644 index 0000000000000..d5a7abd9adaa4 --- /dev/null +++ b/python_modules/libraries/dagster-dg/dagster_dg/config.py @@ -0,0 +1,60 @@ +import sys +from dataclasses import dataclass +from pathlib import Path + +import click +from typing_extensions import Self + +from dagster_dg.error import DgError + +DEFAULT_BUILTIN_COMPONENT_LIB = "dagster_components" + + +def _get_default_cache_dir() -> Path: + if sys.platform == "win32": + return Path.home() / "AppData" / "dg" / "cache" + elif sys.platform == "darwin": + return Path.home() / "Library" / "Caches" / "dg" + else: + return Path.home() / ".cache" / "dg" + + +DEFAULT_CACHE_DIR = _get_default_cache_dir() + + +@dataclass +class DgConfig: + """Global configuration for Dg. + + Attributes: + disable_cache (bool): If True, disable caching. Defaults to False. + cache_dir (Optional[str]): The directory to use for caching. If None, the default cache will + be used. + verbose (bool): If True, log debug information. + builitin_component_lib (str): The name of the builtin component library to load. + """ + + disable_cache: bool = False + cache_dir: Path = DEFAULT_CACHE_DIR + verbose: bool = False + builtin_component_lib: str = DEFAULT_BUILTIN_COMPONENT_LIB + + @classmethod + def from_cli_context(cls, cli_context: click.Context) -> Self: + if _CLI_CONTEXT_CONFIG_KEY not in cli_context.obj: + raise DgError( + "Attempted to extract DgConfig from CLI context but nothing stored under designated key `{_CLI_CONTEXT_CONFIG_KEY}`." + ) + return cli_context.obj[_CLI_CONTEXT_CONFIG_KEY] + + @classmethod + def default(cls) -> "DgConfig": + return cls() + + +_CLI_CONTEXT_CONFIG_KEY = "config" + + +def set_config_on_cli_context(cli_context: click.Context, config: DgConfig) -> None: + cli_context.ensure_object(dict) + cli_context.obj[_CLI_CONTEXT_CONFIG_KEY] = config diff --git a/python_modules/libraries/dagster-dg/dagster_dg/context.py b/python_modules/libraries/dagster-dg/dagster_dg/context.py index ee03ce9ab7552..af25fcd73ff06 100644 --- a/python_modules/libraries/dagster-dg/dagster_dg/context.py +++ b/python_modules/libraries/dagster-dg/dagster_dg/context.py @@ -1,55 +1,64 @@ +import hashlib import json import os +from dataclasses import dataclass from pathlib import Path -from typing import Final, Iterable, Mapping, Optional, Sequence +from typing import Final, Iterable, Optional, Tuple +import click import tomli from typing_extensions import Self +from dagster_dg.cache import CachableDataType, DgCache from dagster_dg.component import RemoteComponentRegistry, RemoteComponentType +from dagster_dg.config import DgConfig from dagster_dg.error import DgError -from dagster_dg.utils import execute_code_location_command +from dagster_dg.utils import ( + execute_code_location_command, + hash_directory_metadata, + hash_file_metadata, +) -def is_inside_deployment_project(path: Path) -> bool: +def is_inside_deployment_directory(path: Path) -> bool: try: - _resolve_deployment_root_path(path) + _resolve_deployment_root_directory(path) return True except DgError: return False -def _resolve_deployment_root_path(path: Path) -> Path: +def _resolve_deployment_root_directory(path: Path) -> Path: current_path = path.absolute() - while not _is_deployment_root(current_path): + while not _is_deployment_root_directory(current_path): current_path = current_path.parent if str(current_path) == "/": raise DgError("Cannot find deployment root") return current_path -def is_inside_code_location_project(path: Path) -> bool: +def _is_deployment_root_directory(path: Path) -> bool: + return (path / "code_locations").exists() + + +def is_inside_code_location_directory(path: Path) -> bool: try: - _resolve_code_location_root_path(path) + _resolve_code_location_root_directory(path) return True except DgError: return False -def _resolve_code_location_root_path(path: Path) -> Path: +def _resolve_code_location_root_directory(path: Path) -> Path: current_path = path.absolute() - while not _is_code_location_root(current_path): + while not _is_code_location_root_directory(current_path): current_path = current_path.parent if str(current_path) == "/": raise DgError("Cannot find code location root") return current_path -def _is_deployment_root(path: Path) -> bool: - return (path / "code_locations").exists() - - -def _is_code_location_root(path: Path) -> bool: +def _is_code_location_root_directory(path: Path) -> bool: if (path / "pyproject.toml").exists(): with open(path / "pyproject.toml") as f: toml = tomli.loads(f.read()) @@ -61,113 +70,160 @@ def _is_code_location_root(path: Path) -> bool: _DEPLOYMENT_CODE_LOCATIONS_DIR: Final = "code_locations" # Code location -_CODE_LOCATION_CUSTOM_COMPONENTS_DIR: Final = "lib" +_CODE_LOCATION_COMPONENTS_LIB_DIR: Final = "lib" _CODE_LOCATION_COMPONENT_INSTANCES_DIR: Final = "components" -class DeploymentProjectContext: +@dataclass +class DgContext: + config: DgConfig + cache: Optional[DgCache] = None + @classmethod - def from_path(cls, path: Path) -> Self: - return cls(root_path=_resolve_deployment_root_path(path)) + def from_cli_context(cls, cli_context: click.Context) -> Self: + return cls.from_config(config=DgConfig.from_cli_context(cli_context)) - def __init__(self, root_path: Path): - self._root_path = root_path + @classmethod + def from_config(cls, config: DgConfig) -> Self: + cache = None if config.disable_cache else DgCache.from_config(config) + return cls(config=config, cache=cache) - @property - def deployment_root(self) -> Path: - return self._root_path + @classmethod + def default(cls) -> Self: + return cls.from_config(DgConfig.default()) + + +@dataclass +class DeploymentDirectoryContext: + root_path: Path + dg_context: DgContext + + @classmethod + def from_path(cls, path: Path, dg_context: DgContext) -> Self: + return cls(root_path=_resolve_deployment_root_directory(path), dg_context=dg_context) @property def code_location_root_path(self) -> Path: - return self._root_path / _DEPLOYMENT_CODE_LOCATIONS_DIR + return self.root_path / _DEPLOYMENT_CODE_LOCATIONS_DIR def has_code_location(self, name: str) -> bool: - return os.path.exists(os.path.join(self._root_path, "code_locations", name)) + return (self.root_path / "code_locations" / name).is_dir() + + def get_code_location_names(self) -> Iterable[str]: + return [loc.name for loc in sorted((self.root_path / "code_locations").iterdir())] + + +def get_code_location_env_hash(code_location_root_path: Path) -> str: + uv_lock_path = code_location_root_path / "uv.lock" + if not uv_lock_path.exists(): + raise DgError(f"uv.lock file not found in {code_location_root_path}") + local_components_path = ( + code_location_root_path / code_location_root_path.name / _CODE_LOCATION_COMPONENTS_LIB_DIR + ) + if not local_components_path.exists(): + raise DgError(f"Local components directory not found in {code_location_root_path}") + hasher = hashlib.md5() + hash_file_metadata(hasher, uv_lock_path) + hash_directory_metadata(hasher, local_components_path) + return hasher.hexdigest() + + +def make_cache_key(code_location_path: Path, data_type: CachableDataType) -> Tuple[str, str, str]: + path_parts = [str(part) for part in code_location_path.parts if part != "/"] + env_hash = get_code_location_env_hash(code_location_path) + return ("_".join(path_parts), env_hash, data_type) + + +@dataclass +class CodeLocationDirectoryContext: + """Class encapsulating contextual information about a components code location directory. + + Args: + root_path (Path): The absolute path to the root of the code location directory. + name (str): The name of the code location python package. + component_registry (ComponentRegistry): The component registry for the code location. + deployment_context (Optional[DeploymentDirectoryContext]): The deployment context containing + the code location directory. Defaults to None. + dg_context (DgContext): The global application context. + """ + + root_path: Path + name: str + component_registry: "RemoteComponentRegistry" + deployment_context: Optional[DeploymentDirectoryContext] + dg_context: DgContext - def list_code_locations(self) -> Iterable[str]: - return sorted(os.listdir(os.path.join(self._root_path, "code_locations"))) + @classmethod + def from_path(cls, path: Path, dg_context: DgContext) -> Self: + root_path = _resolve_code_location_root_directory(path) + cache = dg_context.cache + if cache: + cache_key = make_cache_key(root_path, "component_registry_data") -class CodeLocationProjectContext: - _components_registry: Mapping[str, RemoteComponentType] = {} + raw_registry_data = cache.get(cache_key) if cache else None + if not raw_registry_data: + raw_registry_data = execute_code_location_command( + root_path, ["list", "component-types"], dg_context + ) + if cache: + cache.set(cache_key, raw_registry_data) - @classmethod - def from_path(cls, path: Path) -> Self: - root_path = _resolve_code_location_root_path(path) - raw_component_registry = execute_code_location_command( - root_path, ["list", "component-types"] - ) - component_registry = RemoteComponentRegistry.from_dict(json.loads(raw_component_registry)) - deployment_context = ( - DeploymentProjectContext.from_path(path) if is_inside_deployment_project(path) else None - ) + registry_data = json.loads(raw_registry_data) + component_registry = RemoteComponentRegistry.from_dict(registry_data) return cls( - deployment_context=deployment_context, root_path=root_path, name=path.name, component_registry=component_registry, + deployment_context=DeploymentDirectoryContext.from_path(path, dg_context) + if is_inside_deployment_directory(path) + else None, + dg_context=dg_context, ) - def __init__( - self, - deployment_context: Optional[DeploymentProjectContext], - root_path: Path, - name: str, - component_registry: "RemoteComponentRegistry", - ): - self._deployment_context = deployment_context - self._root_path = root_path - self._name = name - self._component_registry = component_registry - @property - def name(self) -> str: - return self._name + def config(self) -> DgConfig: + return self.dg_context.config @property - def deployment_context(self) -> Optional[DeploymentProjectContext]: - return self._deployment_context + def local_component_types_root_path(self) -> str: + return os.path.join(self.root_path, self.name, _CODE_LOCATION_COMPONENTS_LIB_DIR) @property - def component_types_root_path(self) -> str: - return os.path.join(self._root_path, self._name, _CODE_LOCATION_CUSTOM_COMPONENTS_DIR) + def local_component_types_root_module_name(self) -> str: + return f"{self.name}.{_CODE_LOCATION_COMPONENTS_LIB_DIR}" - @property - def component_types_root_module(self) -> str: - return f"{self._name}.{_CODE_LOCATION_CUSTOM_COMPONENTS_DIR}" - - @property - def component_registry(self) -> "RemoteComponentRegistry": - return self._component_registry + def iter_component_types(self) -> Iterable[Tuple[str, RemoteComponentType]]: + for key in sorted(self.component_registry.keys()): + yield key, self.component_registry.get(key) def has_component_type(self, name: str) -> bool: - return self._component_registry.has(name) + return self.component_registry.has(name) def get_component_type(self, name: str) -> RemoteComponentType: if not self.has_component_type(name): raise DgError(f"No component type named {name}") - return self._component_registry.get(name) - - def list_component_types(self) -> Sequence[str]: - return sorted(self._component_registry.keys()) - - def get_component_instance_path(self, name: str) -> str: - if name not in self.component_instances: - raise DgError(f"No component instance named {name}") - return os.path.join(self.component_instances_root_path, name) + return self.component_registry.get(name) @property - def component_instances_root_path(self) -> str: - return os.path.join(self._root_path, self._name, _CODE_LOCATION_COMPONENT_INSTANCES_DIR) + def component_instances_root_path(self) -> Path: + return self.root_path / self.name / _CODE_LOCATION_COMPONENT_INSTANCES_DIR @property - def component_instances(self) -> Iterable[str]: - return os.listdir( - os.path.join(self._root_path, self._name, _CODE_LOCATION_COMPONENT_INSTANCES_DIR) - ) + def component_instances_root_module_name(self) -> str: + return f"{self.name}.{_CODE_LOCATION_COMPONENT_INSTANCES_DIR}" + + def get_component_instance_names(self) -> Iterable[str]: + return [ + str(instance_path.name) + for instance_path in self.component_instances_root_path.iterdir() + ] + + def get_component_instance_path(self, name: str) -> Path: + if not self.has_component_instance(name): + raise DgError(f"No component instance named {name}") + return self.component_instances_root_path / name def has_component_instance(self, name: str) -> bool: - return os.path.exists( - os.path.join(self._root_path, self._name, _CODE_LOCATION_COMPONENT_INSTANCES_DIR, name) - ) + return (self.component_instances_root_path / name).is_dir() diff --git a/python_modules/libraries/dagster-dg/dagster_dg/error.py b/python_modules/libraries/dagster-dg/dagster_dg/error.py index a6cdf04a81995..4909e68682427 100644 --- a/python_modules/libraries/dagster-dg/dagster_dg/error.py +++ b/python_modules/libraries/dagster-dg/dagster_dg/error.py @@ -1,2 +1,4 @@ class DgError(Exception): + """Base class for errors thrown by the dg CLI.""" + pass diff --git a/python_modules/libraries/dagster-dg/dagster_dg/generate.py b/python_modules/libraries/dagster-dg/dagster_dg/generate.py index 1ca5c72bad6be..46a198f56f7a7 100644 --- a/python_modules/libraries/dagster-dg/dagster_dg/generate.py +++ b/python_modules/libraries/dagster-dg/dagster_dg/generate.py @@ -1,19 +1,26 @@ import os +import subprocess import textwrap from pathlib import Path from typing import Optional, Tuple import click +from dagster_dg.context import CodeLocationDirectoryContext, DgContext from dagster_dg.utils import ( camelcase, - discover_git_root, execute_code_location_command, generate_subtree, + get_uv_command_env, + pushd, ) +# ######################## +# ##### DEPLOYMENT +# ######################## -def generate_deployment(path: str) -> None: + +def generate_deployment(path: Path) -> None: click.echo(f"Creating a Dagster deployment at {path}.") generate_subtree( @@ -25,32 +32,72 @@ def generate_deployment(path: str) -> None: ) -def generate_code_location(path: str, editable_dagster_root: Optional[str] = None) -> None: - click.echo(f"Creating a Dagster code location at {path}.") +# ######################## +# ##### CODE LOCATION +# ######################## - # Temporarily we always set an editable dagster root. This is needed while the packages are not - # published. - editable_dagster_root = ( - editable_dagster_root - or os.environ.get("DAGSTER_GIT_REPO_DIR") - or discover_git_root(Path(__file__)) +# Despite the fact that editable dependencies are resolved through tool.uv.sources, we need to set +# the dependencies themselves differently depending on whether we are using editable dagster or +# not. This is because `tool.uv.sources` only seems to apply to direct dependencies of the package, +# so any 2+-order Dagster dependency of our package needs to be listed as a direct dependency in the +# editable case. +EDITABLE_DAGSTER_DEPENDENCIES = ( + "dagster", + "dagster-pipes", + "dagster-components", +) +EDITABLE_DAGSTER_DEV_DEPENDENCIES = ("dagster-webserver", "dagster-graphql") +PYPI_DAGSTER_DEPENDENCIES = ("dagster-components",) +PYPI_DAGSTER_DEV_DEPENDENCIES = ("dagster-webserver",) + + +def get_pyproject_toml_dependencies(use_editable_dagster: bool) -> str: + deps = EDITABLE_DAGSTER_DEPENDENCIES if use_editable_dagster else PYPI_DAGSTER_DEPENDENCIES + return "\n".join( + [ + "dependencies = [", + *[f' "{dep}",' for dep in deps], + "]", + ] + ) + + +def get_pyproject_toml_dev_dependencies(use_editable_dagster: bool) -> str: + deps = ( + EDITABLE_DAGSTER_DEV_DEPENDENCIES if use_editable_dagster else PYPI_DAGSTER_DEV_DEPENDENCIES + ) + return "\n".join( + [ + "dev = [", + *[f' "{dep}",' for dep in deps], + "]", + ] ) - editable_dagster_uv_sources = textwrap.dedent(f""" - [tool.uv.sources] - dagster = {{ path = "{editable_dagster_root}/python_modules/dagster", editable = true }} - dagster-graphql = {{ path = "{editable_dagster_root}/python_modules/dagster-graphql", editable = true }} - dagster-pipes = {{ path = "{editable_dagster_root}/python_modules/dagster-pipes", editable = true }} - dagster-webserver = {{ path = "{editable_dagster_root}/python_modules/dagster-webserver", editable = true }} - dagster-components = {{ path = "{editable_dagster_root}/python_modules/libraries/dagster-components", editable = true }} - dagster-embedded-elt = {{ path = "{editable_dagster_root}/python_modules/libraries/dagster-embedded-elt", editable = true }} - dagster-dbt = {{ path = "{editable_dagster_root}/python_modules/libraries/dagster-dbt", editable = true }} + +def get_pyproject_toml_uv_sources(editable_dagster_root: str) -> str: + return textwrap.dedent(f""" + [tool.uv.sources] + dagster = {{ path = "{editable_dagster_root}/python_modules/dagster", editable = true }} + dagster-graphql = {{ path = "{editable_dagster_root}/python_modules/dagster-graphql", editable = true }} + dagster-pipes = {{ path = "{editable_dagster_root}/python_modules/dagster-pipes", editable = true }} + dagster-webserver = {{ path = "{editable_dagster_root}/python_modules/dagster-webserver", editable = true }} + dagster-components = {{ path = "{editable_dagster_root}/python_modules/libraries/dagster-components", editable = true }} + dagster-embedded-elt = {{ path = "{editable_dagster_root}/python_modules/libraries/dagster-embedded-elt", editable = true }} + dagster-dbt = {{ path = "{editable_dagster_root}/python_modules/libraries/dagster-dbt", editable = true }} """) - if editable_dagster_root: - uv_sources = editable_dagster_uv_sources - else: - uv_sources = editable_dagster_uv_sources + +def generate_code_location(path: Path, editable_dagster_root: Optional[str] = None) -> None: + click.echo(f"Creating a Dagster code location at {path}.") + + dependencies = get_pyproject_toml_dependencies(use_editable_dagster=bool(editable_dagster_root)) + dev_dependencies = get_pyproject_toml_dev_dependencies( + use_editable_dagster=bool(editable_dagster_root) + ) + uv_sources = ( + get_pyproject_toml_uv_sources(editable_dagster_root) if editable_dagster_root else "" + ) generate_subtree( path=path, @@ -58,14 +105,23 @@ def generate_code_location(path: str, editable_dagster_root: Optional[str] = Non templates_path=os.path.join( os.path.dirname(__file__), "templates", "CODE_LOCATION_NAME_PLACEHOLDER" ), + dependencies=dependencies, + dev_dependencies=dev_dependencies, uv_sources=uv_sources, ) # Build the venv - execute_code_location_command(Path(path), ("uv", "sync")) + with pushd(path): + subprocess.run(["uv", "sync"], check=True, env=get_uv_command_env()) -def generate_component_type(root_path: str, name: str) -> None: +# ######################## +# ##### COMPONENT TYPE +# ######################## + + +def generate_component_type(context: CodeLocationDirectoryContext, name: str) -> None: + root_path = Path(context.local_component_types_root_path) click.echo(f"Creating a Dagster component type at {root_path}/{name}.py.") generate_subtree( @@ -77,33 +133,38 @@ def generate_component_type(root_path: str, name: str) -> None: component_type=name, ) + with open(root_path / "__init__.py", "a") as f: + f.write( + f"from {context.local_component_types_root_module_name}.{name} import {camelcase(name)}\n" + ) + + +# ######################## +# ##### COMPONENT INSTANCE +# ######################## + def generate_component_instance( - root_path: str, + root_path: Path, name: str, component_type: str, json_params: Optional[str], extra_args: Tuple[str, ...], + dg_context: "DgContext", ) -> None: - click.echo(f"Creating a Dagster component instance at {root_path}/{name}.py.") - - component_instance_root_path = os.path.join(root_path, name) - generate_subtree( - path=component_instance_root_path, - name_placeholder="COMPONENT_INSTANCE_NAME_PLACEHOLDER", - templates_path=os.path.join( - os.path.dirname(__file__), "templates", "COMPONENT_INSTANCE_NAME_PLACEHOLDER" - ), - project_name=name, - component_type=component_type, - ) - + component_instance_root_path = root_path / name + click.echo(f"Creating a Dagster component instance folder at {component_instance_root_path}.") + os.makedirs(component_instance_root_path, exist_ok=True) code_location_command = ( "generate", "component", component_type, name, - *([f"--json-params={json_params}"] if json_params else []), + *(["--json-params", json_params] if json_params else []), *(["--", *extra_args] if extra_args else []), ) - execute_code_location_command(Path(component_instance_root_path), code_location_command) + execute_code_location_command( + Path(component_instance_root_path), + code_location_command, + dg_context, + ) diff --git a/python_modules/libraries/dagster-dg/dagster_dg/templates/CODE_LOCATION_NAME_PLACEHOLDER/pyproject.toml.jinja b/python_modules/libraries/dagster-dg/dagster_dg/templates/CODE_LOCATION_NAME_PLACEHOLDER/pyproject.toml.jinja index 688e8e80ab91d..7ff9af903784a 100644 --- a/python_modules/libraries/dagster-dg/dagster_dg/templates/CODE_LOCATION_NAME_PLACEHOLDER/pyproject.toml.jinja +++ b/python_modules/libraries/dagster-dg/dagster_dg/templates/CODE_LOCATION_NAME_PLACEHOLDER/pyproject.toml.jinja @@ -2,25 +2,14 @@ name = "{{ project_name }}" requires-python = ">=3.9,<3.13" version = "0.1.0" -dependencies = [ - "dagster", - "dagster-graphql", - "dagster-pipes", - "dagster-webserver", - "dagster-components[sling,dbt]", - "dagster-embedded-elt", - "dagster-dbt", - "sling-mac-arm64", -] - -[project.optional-dependencies] -dev = [ - "dagster-webserver", -] +{{ dependencies }} [project.entry-points] "dagster.components" = { {{ project_name }} = "{{ project_name }}.lib"} +[dependency-groups] +{{ dev_dependencies }} + [build-system] requires = ["setuptools"] build-backend = "setuptools.build_meta" diff --git a/python_modules/libraries/dagster-dg/dagster_dg/utils.py b/python_modules/libraries/dagster-dg/dagster_dg/utils.py index 2452d563fee9c..57be0cc34e53f 100644 --- a/python_modules/libraries/dagster-dg/dagster_dg/utils.py +++ b/python_modules/libraries/dagster-dg/dagster_dg/utils.py @@ -3,32 +3,62 @@ import posixpath import re import subprocess +import sys +from fnmatch import fnmatch from pathlib import Path -from typing import Any, Final, Iterator, List, Optional, Sequence, Union +from typing import TYPE_CHECKING, Any, Final, Iterator, List, Mapping, Optional, Sequence, Union import click import jinja2 +from typing_extensions import TypeAlias from dagster_dg.version import __version__ as dagster_version +# There is some weirdness concerning the availabilty of hashlib.HASH between different Python +# versions, so for nowe we avoid trying to import it and just alias the type to Any. +Hash: TypeAlias = Any + +if TYPE_CHECKING: + from dagster_dg.context import DgContext + +CLI_CONFIG_KEY = "config" + + _CODE_LOCATION_COMMAND_PREFIX: Final = ["uv", "run", "dagster-components"] -def discover_git_root(path: Path) -> str: +def execute_code_location_command(path: Path, cmd: Sequence[str], dg_context: "DgContext") -> str: + full_cmd = [ + *_CODE_LOCATION_COMMAND_PREFIX, + *( + ["--builtin-component-lib", dg_context.config.builtin_component_lib] + if dg_context.config.builtin_component_lib + else [] + ), + *cmd, + ] + with pushd(path): + result = subprocess.run( + full_cmd, stdout=subprocess.PIPE, env=get_uv_command_env(), check=True + ) + return result.stdout.decode("utf-8") + + +# uv commands should be executed in an environment with no pre-existing VIRTUAL_ENV set. If this +# variable is set (common during development) and does not match the venv resolved by uv, it prints +# undesireable warnings. +def get_uv_command_env() -> Mapping[str, str]: + return {k: v for k, v in os.environ.items() if not k == "VIRTUAL_ENV"} + + +def discover_git_root(path: Path) -> Path: while path != path.parent: if (path / ".git").exists(): - return str(path) + return path path = path.parent raise ValueError("Could not find git root") -def execute_code_location_command(path: Path, cmd: Sequence[str]) -> str: - with pushd(path): - full_cmd = [*_CODE_LOCATION_COMMAND_PREFIX, *cmd] - result = subprocess.run(full_cmd, stdout=subprocess.PIPE, check=False) - return result.stdout.decode("utf-8") - - @contextlib.contextmanager def pushd(path: Union[str, Path]) -> Iterator[None]: old_cwd = os.getcwd() @@ -39,7 +69,6 @@ def pushd(path: Union[str, Path]) -> Iterator[None]: os.chdir(old_cwd) -# Adapted from https://github.com/okunishinishi/python-stringcase/blob/master/stringcase.py def camelcase(string: str) -> str: string = re.sub(r"^[\-_\.]", "", str(string)) if not string: @@ -57,10 +86,11 @@ def snakecase(string: str) -> str: return string -DEFAULT_EXCLUDES: List[str] = [ +_DEFAULT_EXCLUDES: List[str] = [ "__pycache__", ".pytest_cache", "*.egg-info", + "*.cpython-*", ".DS_Store", ".ruff_cache", "tox.ini", @@ -72,7 +102,7 @@ def snakecase(string: str) -> str: # Copied from dagster._generate.generate def generate_subtree( - path: str, + path: Path, excludes: Optional[List[str]] = None, name_placeholder: str = PROJECT_NAME_PLACEHOLDER, templates_path: str = PROJECT_NAME_PLACEHOLDER, @@ -80,36 +110,32 @@ def generate_subtree( **other_template_vars: Any, ): """Renders templates for Dagster project.""" - excludes = DEFAULT_EXCLUDES if not excludes else DEFAULT_EXCLUDES + excludes + excludes = _DEFAULT_EXCLUDES if not excludes else _DEFAULT_EXCLUDES + excludes normalized_path = os.path.normpath(path) project_name = project_name or os.path.basename(normalized_path).replace("-", "_") if not os.path.exists(normalized_path): os.mkdir(normalized_path) - project_template_path: str = os.path.join( - os.path.dirname(__file__), "templates", templates_path - ) - loader: jinja2.loaders.FileSystemLoader = jinja2.FileSystemLoader( - searchpath=project_template_path - ) - env: jinja2.environment.Environment = jinja2.Environment(loader=loader) + project_template_path = os.path.join(os.path.dirname(__file__), "templates", templates_path) + loader = jinja2.FileSystemLoader(searchpath=project_template_path) + env = jinja2.Environment(loader=loader) # merge custom skip_files with the default list for root, dirs, files in os.walk(project_template_path): # For each subdirectory in the source template, create a subdirectory in the destination. for dirname in dirs: - src_dir_path: str = os.path.join(root, dirname) + src_dir_path = os.path.join(root, dirname) if _should_skip_file(src_dir_path, excludes): continue - src_relative_dir_path: str = os.path.relpath(src_dir_path, project_template_path) - dst_relative_dir_path: str = src_relative_dir_path.replace( + src_relative_dir_path = os.path.relpath(src_dir_path, project_template_path) + dst_relative_dir_path = src_relative_dir_path.replace( name_placeholder, project_name, 1, ) - dst_dir_path: str = os.path.join(normalized_path, dst_relative_dir_path) + dst_dir_path = os.path.join(normalized_path, dst_relative_dir_path) os.mkdir(dst_dir_path) @@ -119,20 +145,20 @@ def generate_subtree( if _should_skip_file(src_file_path, excludes): continue - src_relative_file_path: str = os.path.relpath(src_file_path, project_template_path) - dst_relative_file_path: str = src_relative_file_path.replace( + src_relative_file_path = os.path.relpath(src_file_path, project_template_path) + dst_relative_file_path = src_relative_file_path.replace( name_placeholder, project_name, 1, ) - dst_file_path: str = os.path.join(normalized_path, dst_relative_file_path) + dst_file_path = os.path.join(normalized_path, dst_relative_file_path) if dst_file_path.endswith(".jinja"): dst_file_path = dst_file_path[: -len(".jinja")] with open(dst_file_path, "w", encoding="utf8") as f: # Jinja template names must use the POSIX path separator "/". - template_name: str = src_relative_file_path.replace(os.sep, posixpath.sep) + template_name = src_relative_file_path.replace(os.sep, posixpath.sep) template: jinja2.environment.Template = env.get_template(name=template_name) f.write( template.render( @@ -148,7 +174,7 @@ def generate_subtree( click.echo(f"Generated files for Dagster project in {path}.") -def _should_skip_file(path: str, excludes: List[str] = DEFAULT_EXCLUDES): +def _should_skip_file(path: str, excludes: List[str] = _DEFAULT_EXCLUDES): """Given a file path `path` in a source template, returns whether or not the file should be skipped when generating destination files. @@ -159,3 +185,29 @@ def _should_skip_file(path: str, excludes: List[str] = DEFAULT_EXCLUDES): return True return False + + +def ensure_dagster_dg_tests_import() -> None: + from dagster_dg import __file__ as dagster_dg_init_py + + dagster_dg_package_root = (Path(dagster_dg_init_py) / ".." / "..").resolve() + assert ( + dagster_dg_package_root / "dagster_dg_tests" + ).exists(), "Could not find dagster_dg_tests where expected" + sys.path.append(dagster_dg_package_root.as_posix()) + + +def hash_directory_metadata(hasher: Hash, path: Union[str, Path]) -> None: + for root, dirs, files in os.walk(path): + for name in dirs + files: + if any(fnmatch(name, pattern) for pattern in _DEFAULT_EXCLUDES): + continue + filepath = os.path.join(root, name) + hash_file_metadata(hasher, filepath) + + +def hash_file_metadata(hasher: Hash, path: Union[str, Path]) -> None: + stat = os.stat(path=path) + hasher.update(str(path).encode()) + hasher.update(str(stat.st_mtime).encode()) # Last modified time + hasher.update(str(stat.st_size).encode()) # File size diff --git a/python_modules/libraries/dagster-dg/dagster_dg_tests/cli_tests/test_generate_commands.py b/python_modules/libraries/dagster-dg/dagster_dg_tests/cli_tests/test_generate_commands.py index 776d33f996bff..7db275b8b0b92 100644 --- a/python_modules/libraries/dagster-dg/dagster_dg_tests/cli_tests/test_generate_commands.py +++ b/python_modules/libraries/dagster-dg/dagster_dg_tests/cli_tests/test_generate_commands.py @@ -1,118 +1,27 @@ -import importlib -import inspect import json import os -import sys -import textwrap -from contextlib import contextmanager +import subprocess from pathlib import Path -from typing import Iterator import pytest import tomli -from click.testing import CliRunner -from dagster_dg.cli.generate import ( - generate_code_location_command, - generate_component_command, - generate_component_type_command, - generate_deployment_command, -) -from dagster_dg.context import CodeLocationProjectContext -from dagster_dg.utils import pushd - - -def _ensure_cwd_on_sys_path(): - if sys.path[0] != "": - sys.path.insert(0, "") - - -def _assert_module_imports(module_name: str): - _ensure_cwd_on_sys_path() - assert importlib.import_module(module_name) - - -# This is a holder for code that is intended to be written to a file -def _example_component_type_baz(): - from typing import Any - - from dagster import AssetExecutionContext, Definitions, PipesSubprocessClient, asset - from dagster_components import Component, ComponentLoadContext, component +from dagster_dg.context import CodeLocationDirectoryContext, DgContext +from dagster_dg.utils import discover_git_root, ensure_dagster_dg_tests_import - _SAMPLE_PIPES_SCRIPT = """ - from dagster_pipes import open_dagster_pipes +ensure_dagster_dg_tests_import() - context = open_dagster_pipes() - context.report_asset_materialization({"alpha": "beta"}) - """ - - @component(name="baz") - class Baz(Component): - @classmethod - def generate_files(cls, params: Any): - with open("sample.py", "w") as f: - f.write(_SAMPLE_PIPES_SCRIPT) - - def build_defs(self, context: ComponentLoadContext) -> Definitions: - @asset - def foo(context: AssetExecutionContext, client: PipesSubprocessClient): - client.run(context=context, command=["python", "sample.py"]) - - return Definitions(assets=[foo], resources={"client": PipesSubprocessClient()}) - - -@contextmanager -def isolated_example_deployment_foo(runner: CliRunner) -> Iterator[None]: - with runner.isolated_filesystem(): - runner.invoke(generate_deployment_command, ["foo"]) - with pushd("foo"): - yield - - -@contextmanager -def isolated_example_code_location_bar( - runner: CliRunner, in_deployment: bool = True -) -> Iterator[None]: - if in_deployment: - with isolated_example_deployment_foo(runner), clean_module_cache("bar"): - runner.invoke(generate_code_location_command, ["bar"]) - with pushd("code_locations/bar"): - yield - else: - with runner.isolated_filesystem(), clean_module_cache("bar"): - runner.invoke(generate_code_location_command, ["bar"]) - with pushd("bar"): - yield - - -@contextmanager -def isolated_example_code_location_bar_with_component_type_baz( - runner: CliRunner, in_deployment: bool = True -) -> Iterator[None]: - with isolated_example_code_location_bar(runner, in_deployment): - with open("bar/lib/baz.py", "w") as f: - component_type_source = textwrap.dedent( - inspect.getsource(_example_component_type_baz).split("\n", 1)[1] - ) - f.write(component_type_source) - yield - - -@contextmanager -def clean_module_cache(module_name: str): - prefix = f"{module_name}." - keys_to_del = { - key for key in sys.modules.keys() if key == module_name or key.startswith(prefix) - } - for key in keys_to_del: - del sys.modules[key] - yield +from dagster_dg_tests.utils import ( + ProxyRunner, + assert_runner_result, + isolated_example_code_location_bar, + isolated_example_deployment_foo, +) def test_generate_deployment_command_success() -> None: - runner = CliRunner() - with runner.isolated_filesystem(): - result = runner.invoke(generate_deployment_command, ["foo"]) - assert result.exit_code == 0 + with ProxyRunner.test() as runner, runner.isolated_filesystem(): + result = runner.invoke("generate", "deployment", "foo") + assert_runner_result(result) assert Path("foo").exists() assert Path("foo/.github").exists() assert Path("foo/.github/workflows").exists() @@ -122,19 +31,17 @@ def test_generate_deployment_command_success() -> None: def test_generate_deployment_command_already_exists_fails() -> None: - runner = CliRunner() - with runner.isolated_filesystem(): + with ProxyRunner.test() as runner, runner.isolated_filesystem(): os.mkdir("foo") - result = runner.invoke(generate_deployment_command, ["foo"]) - assert result.exit_code != 0 + result = runner.invoke("generate", "deployment", "foo") + assert_runner_result(result, exit_0=False) assert "already exists" in result.output def test_generate_code_location_inside_deployment_success() -> None: - runner = CliRunner() - with isolated_example_deployment_foo(runner): - result = runner.invoke(generate_code_location_command, ["bar"]) - assert result.exit_code == 0 + with ProxyRunner.test() as runner, isolated_example_deployment_foo(runner): + result = runner.invoke("generate", "code-location", "bar") + assert_runner_result(result) assert Path("code_locations/bar").exists() assert Path("code_locations/bar/bar").exists() assert Path("code_locations/bar/bar/lib").exists() @@ -146,19 +53,17 @@ def test_generate_code_location_inside_deployment_success() -> None: assert Path("code_locations/bar/.venv").exists() assert Path("code_locations/bar/uv.lock").exists() - # Commented out because we are always adding sources right now - # with open("code_locations/bar/pyproject.toml") as f: - # toml = tomli.loads(f.read()) - # - # # No tool.uv.sources added without --use-editable-dagster - # assert "uv" not in toml["tool"] + with open("code_locations/bar/pyproject.toml") as f: + toml = tomli.loads(f.read()) + + # No tool.uv.sources added without --use-editable-dagster + assert "uv" not in toml["tool"] def test_generate_code_location_outside_deployment_success() -> None: - runner = CliRunner() - with runner.isolated_filesystem(): - result = runner.invoke(generate_code_location_command, ["bar"]) - assert result.exit_code == 0 + with ProxyRunner.test() as runner, runner.isolated_filesystem(): + result = runner.invoke("generate", "code-location", "bar") + assert_runner_result(result) assert Path("bar").exists() assert Path("bar/bar").exists() assert Path("bar/bar/lib").exists() @@ -171,22 +76,17 @@ def test_generate_code_location_outside_deployment_success() -> None: assert Path("bar/uv.lock").exists() -def _find_git_root(): - current = Path.cwd() - while current != current.parent: - if (current / ".git").exists(): - return current - current = current.parent - raise Exception("Could not find git root") - - -def test_generate_code_location_editable_dagster_success(monkeypatch) -> None: - runner = CliRunner() - dagster_git_repo_dir = _find_git_root() - monkeypatch.setenv("DAGSTER_GIT_REPO_DIR", dagster_git_repo_dir) - with isolated_example_deployment_foo(runner): - result = runner.invoke(generate_code_location_command, ["--use-editable-dagster", "bar"]) - assert result.exit_code == 0 +@pytest.mark.parametrize("mode", ["env_var", "arg"]) +def test_generate_code_location_editable_dagster_success(mode: str, monkeypatch) -> None: + dagster_git_repo_dir = discover_git_root(Path(__file__)) + if mode == "env_var": + monkeypatch.setenv("DAGSTER_GIT_REPO_DIR", str(dagster_git_repo_dir)) + editable_args = ["--use-editable-dagster", "--"] + else: + editable_args = ["--use-editable-dagster", str(dagster_git_repo_dir)] + with ProxyRunner.test() as runner, isolated_example_deployment_foo(runner): + result = runner.invoke("generate", "code-location", *editable_args, "bar") + assert_runner_result(result) assert Path("code_locations/bar").exists() assert Path("code_locations/bar/pyproject.toml").exists() with open("code_locations/bar/pyproject.toml") as f: @@ -209,85 +109,176 @@ def test_generate_code_location_editable_dagster_success(monkeypatch) -> None: } +def test_generate_code_location_editable_dagster_no_env_var_no_value_fails(monkeypatch) -> None: + monkeypatch.setenv("DAGSTER_GIT_REPO_DIR", "") + with ProxyRunner.test() as runner, isolated_example_deployment_foo(runner): + result = runner.invoke("generate", "code-location", "--use-editable-dagster", "--", "bar") + assert_runner_result(result, exit_0=False) + assert "requires the `DAGSTER_GIT_REPO_DIR`" in result.output + + def test_generate_code_location_already_exists_fails() -> None: - runner = CliRunner() - with isolated_example_deployment_foo(runner): - result = runner.invoke(generate_code_location_command, ["bar"]) - assert result.exit_code == 0 - result = runner.invoke(generate_code_location_command, ["bar"]) - assert result.exit_code != 0 + with ProxyRunner.test() as runner, isolated_example_deployment_foo(runner): + result = runner.invoke("generate", "code-location", "bar") + assert_runner_result(result) + result = runner.invoke("generate", "code-location", "bar") + assert_runner_result(result, exit_0=False) assert "already exists" in result.output @pytest.mark.parametrize("in_deployment", [True, False]) def test_generate_component_type_success(in_deployment: bool) -> None: - runner = CliRunner() - with isolated_example_code_location_bar(runner, in_deployment): - result = runner.invoke(generate_component_type_command, ["baz"]) - assert result.exit_code == 0 + with ProxyRunner.test() as runner, isolated_example_code_location_bar(runner, in_deployment): + result = runner.invoke("generate", "component-type", "baz") + assert_runner_result(result) assert Path("bar/lib/baz.py").exists() - context = CodeLocationProjectContext.from_path(Path.cwd()) + context = CodeLocationDirectoryContext.from_path(Path.cwd(), DgContext.default()) assert context.has_component_type("bar.baz") def test_generate_component_type_outside_code_location_fails() -> None: - runner = CliRunner() - with isolated_example_deployment_foo(runner): - result = runner.invoke(generate_component_type_command, ["baz"]) - assert result.exit_code != 0 - assert "must be run inside a Dagster code location project" in result.output + with ProxyRunner.test() as runner, isolated_example_deployment_foo(runner): + result = runner.invoke("generate", "component-type", "baz") + assert_runner_result(result, exit_0=False) + assert "must be run inside a Dagster code location directory" in result.output @pytest.mark.parametrize("in_deployment", [True, False]) def test_generate_component_type_already_exists_fails(in_deployment: bool) -> None: - runner = CliRunner() - with isolated_example_code_location_bar(runner, in_deployment): - result = runner.invoke(generate_component_type_command, ["baz"]) - assert result.exit_code == 0 - result = runner.invoke(generate_component_type_command, ["baz"]) - assert result.exit_code != 0 + with ProxyRunner.test() as runner, isolated_example_code_location_bar(runner, in_deployment): + result = runner.invoke("generate", "component-type", "baz") + assert_runner_result(result) + result = runner.invoke("generate", "component-type", "baz") + assert_runner_result(result, exit_0=False) assert "already exists" in result.output @pytest.mark.parametrize("in_deployment", [True, False]) -def test_generate_component_success(in_deployment: bool) -> None: - runner = CliRunner() - with isolated_example_code_location_bar_with_component_type_baz(runner, in_deployment): - result = runner.invoke(generate_component_command, ["bar.baz", "qux"]) - assert result.exit_code == 0 +def test_generate_component_no_params_success(in_deployment: bool) -> None: + with ProxyRunner.test() as runner, isolated_example_code_location_bar(runner, in_deployment): + result = runner.invoke( + "generate", + "component", + "dagster_components.test.all_metadata_empty_asset", + "qux", + ) + assert_runner_result(result) assert Path("bar/components/qux").exists() - assert Path("bar/components/qux/sample.py").exists() component_yaml_path = Path("bar/components/qux/component.yaml") assert component_yaml_path.exists() - assert "type: bar.baz" in component_yaml_path.read_text() + assert ( + "type: dagster_components.test.all_metadata_empty_asset" + in component_yaml_path.read_text() + ) + + +@pytest.mark.parametrize("in_deployment", [True, False]) +def test_generate_component_json_params_success(in_deployment: bool) -> None: + with ProxyRunner.test() as runner, isolated_example_code_location_bar(runner, in_deployment): + result = runner.invoke( + "generate", + "component", + "dagster_components.test.simple_pipes_script_asset", + "qux", + "--json-params", + '{"asset_key": "foo", "filename": "hello.py"}', + ) + assert_runner_result(result) + assert Path("bar/components/qux").exists() + assert Path("bar/components/qux/hello.py").exists() + component_yaml_path = Path("bar/components/qux/component.yaml") + assert component_yaml_path.exists() + assert ( + "type: dagster_components.test.simple_pipes_script_asset" + in component_yaml_path.read_text() + ) + + +@pytest.mark.parametrize("in_deployment", [True, False]) +def test_generate_component_extra_args_success(in_deployment: bool) -> None: + with ProxyRunner.test() as runner, isolated_example_code_location_bar(runner, in_deployment): + result = runner.invoke( + "generate", + "component", + "dagster_components.test.simple_pipes_script_asset", + "qux", + "--", + "--asset-key=foo", + "--filename=hello.py", + ) + assert_runner_result(result) + assert Path("bar/components/qux").exists() + assert Path("bar/components/qux/hello.py").exists() + component_yaml_path = Path("bar/components/qux/component.yaml") + assert component_yaml_path.exists() + assert ( + "type: dagster_components.test.simple_pipes_script_asset" + in component_yaml_path.read_text() + ) + + +def test_generate_component_json_params_and_extra_args_fails() -> None: + with ProxyRunner.test() as runner, isolated_example_code_location_bar(runner): + result = runner.invoke( + "generate", + "component", + "dagster_components.test.simple_pipes_script_asset", + "qux", + "--json-params", + '{"filename": "hello.py"}', + "--", + "--filename=hello.py", + ) + assert_runner_result(result, exit_0=False) + assert "Detected both --json-params and EXTRA_ARGS" in result.output def test_generate_component_outside_code_location_fails() -> None: - runner = CliRunner() - with isolated_example_deployment_foo(runner): - result = runner.invoke(generate_component_command, ["bar.baz", "qux"]) - assert result.exit_code != 0 - assert "must be run inside a Dagster code location project" in result.output + with ProxyRunner.test() as runner, isolated_example_deployment_foo(runner): + result = runner.invoke("generate", "component", "bar.baz", "qux") + assert_runner_result(result, exit_0=False) + assert "must be run inside a Dagster code location directory" in result.output @pytest.mark.parametrize("in_deployment", [True, False]) def test_generate_component_already_exists_fails(in_deployment: bool) -> None: - runner = CliRunner() - with isolated_example_code_location_bar_with_component_type_baz(runner, in_deployment): - result = runner.invoke(generate_component_command, ["bar.baz", "qux"]) - assert result.exit_code == 0 - result = runner.invoke(generate_component_command, ["bar.baz", "qux"]) - assert result.exit_code != 0 + with ProxyRunner.test() as runner, isolated_example_code_location_bar(runner, in_deployment): + result = runner.invoke( + "generate", + "component", + "dagster_components.test.all_metadata_empty_asset", + "qux", + ) + assert_runner_result(result) + result = runner.invoke( + "generate", + "component", + "dagster_components.test.all_metadata_empty_asset", + "qux", + ) + assert_runner_result(result, exit_0=False) assert "already exists" in result.output +# ######################## +# ##### REAL COMPONENTS +# ######################## + + def test_generate_sling_replication_instance() -> None: - runner = CliRunner() - with isolated_example_code_location_bar(runner): + with ( + ProxyRunner.test(use_test_component_lib=False) as runner, + isolated_example_code_location_bar(runner), + ): + # We need to add dagster-embedded-elt also because we are using editable installs. Only + # direct dependencies will be resolved by uv.tool.sources. + subprocess.run( + ["uv", "add", "dagster-components[sling]", "dagster-embedded-elt"], check=True + ) result = runner.invoke( - generate_component_command, ["dagster_components.sling_replication", "file_ingest"] + "generate", "component", "dagster_components.sling_replication", "file_ingest" ) - assert result.exit_code == 0 + assert_runner_result(result) assert Path("bar/components/file_ingest").exists() component_yaml_path = Path("bar/components/file_ingest/component.yaml") @@ -310,12 +301,21 @@ def test_generate_sling_replication_instance() -> None: ], ) def test_generate_dbt_project_instance(params) -> None: - runner = CliRunner() - with isolated_example_code_location_bar(runner): + with ( + ProxyRunner.test(use_test_component_lib=False) as runner, + isolated_example_code_location_bar(runner), + ): + # We need to add dagster-dbt also because we are using editable installs. Only + # direct dependencies will be resolved by uv.tool.sources. + subprocess.run(["uv", "add", "dagster-components[dbt]", "dagster-dbt"], check=True) result = runner.invoke( - generate_component_command, ["dagster_components.dbt_project", "my_project", *params] + "generate", + "component", + "dagster_components.dbt_project", + "my_project", + *params, ) - assert result.exit_code == 0 + assert_runner_result(result) assert Path("bar/components/my_project").exists() component_yaml_path = Path("bar/components/my_project/component.yaml") diff --git a/python_modules/libraries/dagster-dg/dagster_dg_tests/cli_tests/test_info_commands.py b/python_modules/libraries/dagster-dg/dagster_dg_tests/cli_tests/test_info_commands.py new file mode 100644 index 0000000000000..27d9005156ccb --- /dev/null +++ b/python_modules/libraries/dagster-dg/dagster_dg_tests/cli_tests/test_info_commands.py @@ -0,0 +1,200 @@ +import textwrap + +from dagster_dg.utils import ensure_dagster_dg_tests_import + +ensure_dagster_dg_tests_import() + +from dagster_dg_tests.utils import ( + ProxyRunner, + assert_runner_result, + isolated_example_code_location_bar, +) + + +def test_info_component_type_all_metadata_success(): + with ProxyRunner.test() as runner, isolated_example_code_location_bar(runner): + result = runner.invoke( + "info", + "component-type", + "dagster_components.test.simple_pipes_script_asset", + ) + assert_runner_result(result) + assert ( + result.output.strip() + == textwrap.dedent(""" + dagster_components.test.simple_pipes_script_asset + + Description: + + A simple asset that runs a Python script with the Pipes subprocess client. + + Because it is a pipes asset, no value is returned. + + Generate params schema: + + { + "properties": { + "asset_key": { + "title": "Asset Key", + "type": "string" + }, + "filename": { + "title": "Filename", + "type": "string" + } + }, + "required": [ + "asset_key", + "filename" + ], + "title": "SimplePipesScriptAssetParams", + "type": "object" + } + + Component params schema: + + { + "properties": { + "asset_key": { + "title": "Asset Key", + "type": "string" + }, + "filename": { + "title": "Filename", + "type": "string" + } + }, + "required": [ + "asset_key", + "filename" + ], + "title": "SimplePipesScriptAssetParams", + "type": "object" + } + """).strip() + ) + + +def test_info_component_type_all_metadata_empty_success(): + with ProxyRunner.test() as runner, isolated_example_code_location_bar(runner): + result = runner.invoke( + "info", + "component-type", + "dagster_components.test.all_metadata_empty_asset", + ) + assert_runner_result(result) + assert ( + result.output.strip() + == textwrap.dedent(""" + dagster_components.test.all_metadata_empty_asset + """).strip() + ) + + +def test_info_component_type_flag_fields_success(): + with ProxyRunner.test() as runner, isolated_example_code_location_bar(runner): + result = runner.invoke( + "info", + "component-type", + "dagster_components.test.simple_pipes_script_asset", + "--description", + ) + assert_runner_result(result) + assert ( + result.output.strip() + == textwrap.dedent(""" + A simple asset that runs a Python script with the Pipes subprocess client. + + Because it is a pipes asset, no value is returned. + """).strip() + ) + + result = runner.invoke( + "info", + "component-type", + "dagster_components.test.simple_pipes_script_asset", + "--generate-params-schema", + ) + assert_runner_result(result) + assert ( + result.output.strip() + == textwrap.dedent(""" + { + "properties": { + "asset_key": { + "title": "Asset Key", + "type": "string" + }, + "filename": { + "title": "Filename", + "type": "string" + } + }, + "required": [ + "asset_key", + "filename" + ], + "title": "SimplePipesScriptAssetParams", + "type": "object" + } + """).strip() + ) + + result = runner.invoke( + "info", + "component-type", + "dagster_components.test.simple_pipes_script_asset", + "--component-params-schema", + ) + assert_runner_result(result) + assert ( + result.output.strip() + == textwrap.dedent(""" + { + "properties": { + "asset_key": { + "title": "Asset Key", + "type": "string" + }, + "filename": { + "title": "Filename", + "type": "string" + } + }, + "required": [ + "asset_key", + "filename" + ], + "title": "SimplePipesScriptAssetParams", + "type": "object" + } + """).strip() + ) + + +def test_info_component_type_outside_code_location_fails() -> None: + with ProxyRunner.test() as runner, runner.isolated_filesystem(): + result = runner.invoke( + "info", + "component-type", + "dagster_components.test.simple_pipes_script_asset", + "--component-params-schema", + ) + assert_runner_result(result, exit_0=False) + assert "must be run inside a Dagster code location directory" in result.output + + +def test_info_component_type_multiple_flags_fails() -> None: + with ProxyRunner.test() as runner, isolated_example_code_location_bar(runner): + result = runner.invoke( + "info", + "component-type", + "dagster_components.test.simple_pipes_script_asset", + "--description", + "--generate-params-schema", + ) + assert_runner_result(result, exit_0=False) + assert ( + "Only one of --description, --generate-params-schema, and --component-params-schema can be specified." + in result.output + ) diff --git a/python_modules/libraries/dagster-dg/dagster_dg_tests/cli_tests/test_list_commands.py b/python_modules/libraries/dagster-dg/dagster_dg_tests/cli_tests/test_list_commands.py index 630d2ae2fa95b..abb22d8934b93 100644 --- a/python_modules/libraries/dagster-dg/dagster_dg_tests/cli_tests/test_list_commands.py +++ b/python_modules/libraries/dagster-dg/dagster_dg_tests/cli_tests/test_list_commands.py @@ -1,90 +1,83 @@ -import sys -from pathlib import Path - -from click.testing import CliRunner -from dagster_dg import __file__ as dagster_dg_init_py -from dagster_dg.cli.generate import generate_code_location_command, generate_component_command -from dagster_dg.cli.list import ( - list_code_locations_command, - list_component_types_command, - list_components_command, -) - - -def ensure_dagster_dg_tests_import() -> None: - dagster_dg_package_root = (Path(dagster_dg_init_py) / ".." / "..").resolve() - assert ( - dagster_dg_package_root / "dagster_dg_tests" - ).exists(), "Could not find dagster_dg_tests where expected" - sys.path.append(dagster_dg_package_root.as_posix()) +import textwrap +from dagster_dg.utils import ensure_dagster_dg_tests_import ensure_dagster_dg_tests_import() -from dagster_dg_tests.cli_tests.test_generate_commands import ( +from dagster_dg_tests.utils import ( + ProxyRunner, + assert_runner_result, isolated_example_code_location_bar, - isolated_example_code_location_bar_with_component_type_baz, isolated_example_deployment_foo, ) def test_list_code_locations_success(): - runner = CliRunner() - with isolated_example_deployment_foo(runner): - runner.invoke(generate_code_location_command, ["foo"]) - runner.invoke(generate_code_location_command, ["bar"]) - result = runner.invoke(list_code_locations_command) - assert result.exit_code == 0 - assert result.output == "bar\nfoo\n" + with ProxyRunner.test() as runner, isolated_example_deployment_foo(runner): + runner.invoke("generate", "code-location", "foo") + runner.invoke("generate", "code-location", "bar") + result = runner.invoke("list", "code-locations") + assert_runner_result(result) + assert ( + result.output.strip() + == textwrap.dedent(""" + bar + foo + """).strip() + ) def test_list_code_locations_outside_deployment_fails() -> None: - runner = CliRunner() - with runner.isolated_filesystem(): - result = runner.invoke(list_code_locations_command) - assert result.exit_code != 0 - assert "must be run inside a Dagster deployment project" in result.output + with ProxyRunner.test() as runner, runner.isolated_filesystem(): + result = runner.invoke("list", "code-locations") + assert_runner_result(result, exit_0=False) + assert "must be run inside a Dagster deployment directory" in result.output def test_list_component_types_success(): - runner = CliRunner() - with isolated_example_code_location_bar(runner): - result = runner.invoke(list_component_types_command) - assert result.exit_code == 0 + with ProxyRunner.test() as runner, isolated_example_code_location_bar(runner): + result = runner.invoke("list", "component-types") + assert_runner_result(result) assert ( - result.output - == "\n".join( - [ - "dagster_components.dbt_project", - "dagster_components.pipes_subprocess_script_collection", - "dagster_components.sling_replication", - ] - ) - + "\n" + result.output.strip() + == textwrap.dedent(""" + dagster_components.test.all_metadata_empty_asset + dagster_components.test.simple_asset + A simple asset that returns a constant string value. + dagster_components.test.simple_pipes_script_asset + A simple asset that runs a Python script with the Pipes subprocess client. + """).strip() ) def test_list_component_types_outside_code_location_fails() -> None: - runner = CliRunner() - with runner.isolated_filesystem(): - result = runner.invoke(list_component_types_command) - assert result.exit_code != 0 - assert "must be run inside a Dagster code location project" in result.output + with ProxyRunner.test() as runner, runner.isolated_filesystem(): + result = runner.invoke("list", "component-types") + assert_runner_result(result, exit_0=False) + assert "must be run inside a Dagster code location directory" in result.output def test_list_components_succeeds(): - runner = CliRunner() - # with isolated_example_code_location_bar(runner): - with isolated_example_code_location_bar_with_component_type_baz(runner): - result = runner.invoke(list_components_command) - runner.invoke(generate_component_command, ["bar.baz", "qux"]) - result = runner.invoke(list_components_command) - assert result.output == "qux\n" + with ProxyRunner.test() as runner, isolated_example_code_location_bar(runner): + result = runner.invoke( + "generate", + "component", + "dagster_components.test.all_metadata_empty_asset", + "qux", + ) + assert_runner_result(result) + result = runner.invoke("list", "components") + assert_runner_result(result) + assert ( + result.output.strip() + == textwrap.dedent(""" + qux + """).strip() + ) def test_list_components_command_outside_code_location_fails() -> None: - runner = CliRunner() - with runner.isolated_filesystem(): - result = runner.invoke(list_components_command) - assert result.exit_code != 0 - assert "must be run inside a Dagster code location project" in result.output + with ProxyRunner.test() as runner, runner.isolated_filesystem(): + result = runner.invoke("list", "components") + assert_runner_result(result, exit_0=False) + assert "must be run inside a Dagster code location directory" in result.output diff --git a/python_modules/libraries/dagster-dg/dagster_dg_tests/test_cache.py b/python_modules/libraries/dagster-dg/dagster_dg_tests/test_cache.py new file mode 100644 index 0000000000000..4636259c921fc --- /dev/null +++ b/python_modules/libraries/dagster-dg/dagster_dg_tests/test_cache.py @@ -0,0 +1,93 @@ +import subprocess +from pathlib import Path + +import pytest + +from dagster_dg_tests.utils import ( + ProxyRunner, + assert_runner_result, + isolated_example_code_location_bar, +) + + +def test_load_from_cache(): + with ProxyRunner.test(verbose=True) as runner, isolated_example_code_location_bar(runner): + result = runner.invoke("list", "component-types") + assert_runner_result(result) + assert "CACHE [miss]" in result.output + assert "CACHE [write]" in result.output + result = runner.invoke("list", "component-types") + assert_runner_result(result) + assert "CACHE [hit]" in result.output + + +def test_cache_invalidation_uv_lock(): + with ProxyRunner.test(verbose=True) as runner, isolated_example_code_location_bar(runner): + result = runner.invoke("list", "component-types") + assert_runner_result(result) + assert "CACHE [miss]" in result.output + assert "CACHE [write]" in result.output + + subprocess.run(["uv", "add", "dagster-components[dbt]"], check=True) + + result = runner.invoke("list", "component-types") + assert_runner_result(result) + assert "CACHE [miss]" in result.output + + +def test_cache_invalidation_modified_lib(): + with ProxyRunner.test(verbose=True) as runner, isolated_example_code_location_bar(runner): + result = runner.invoke("list", "component-types") + assert_runner_result(result) + assert "CACHE [miss]" in result.output + assert "CACHE [write]" in result.output + + result = runner.invoke("generate", "component-type", "my_component") + assert_runner_result(result) + + result = runner.invoke("list", "component-types") + assert_runner_result(result) + assert "CACHE [miss]" in result.output + + +def test_cache_no_invalidation_modified_pkg(): + with ProxyRunner.test(verbose=True) as runner, isolated_example_code_location_bar(runner): + result = runner.invoke("list", "component-types") + assert_runner_result(result) + assert "CACHE [miss]" in result.output + assert "CACHE [write]" in result.output + + Path("bar/submodule.py").write_text("print('hello')") + + result = runner.invoke("list", "component-types") + assert_runner_result(result) + assert "CACHE [hit]" in result.output + + +@pytest.mark.parametrize("with_command", [True, False]) +def test_cache_clear(with_command: bool): + with ProxyRunner.test(verbose=True) as runner, isolated_example_code_location_bar(runner): + result = runner.invoke("list", "component-types") + assert_runner_result(result) + assert "CACHE [miss]" in result.output + assert "CACHE [write]" in result.output + + if with_command: + result = runner.invoke("--clear-cache", "list", "component-types") + else: + result = runner.invoke("--clear-cache") + assert_runner_result(result) + result = runner.invoke("list", "component-types") + + assert_runner_result(result) + assert "CACHE [miss]" in result.output + + +def test_cache_disabled(): + with ( + ProxyRunner.test(verbose=True, disable_cache=True) as runner, + isolated_example_code_location_bar(runner), + ): + result = runner.invoke("list", "component-types") + assert_runner_result(result) + assert "CACHE" not in result.output diff --git a/python_modules/libraries/dagster-dg/dagster_dg_tests/utils.py b/python_modules/libraries/dagster-dg/dagster_dg_tests/utils.py new file mode 100644 index 0000000000000..d3628b5e229ed --- /dev/null +++ b/python_modules/libraries/dagster-dg/dagster_dg_tests/utils.py @@ -0,0 +1,107 @@ +import traceback +from contextlib import contextmanager +from dataclasses import dataclass +from pathlib import Path +from tempfile import TemporaryDirectory +from types import TracebackType +from typing import Iterator, Optional, Sequence, Tuple, Type, Union + +from click.testing import CliRunner, Result +from dagster_dg.cli import cli as dg_cli +from dagster_dg.utils import discover_git_root, pushd +from typing_extensions import Self + + +@contextmanager +def isolated_example_deployment_foo(runner: Union[CliRunner, "ProxyRunner"]) -> Iterator[None]: + runner = ProxyRunner(runner) if isinstance(runner, CliRunner) else runner + with runner.isolated_filesystem(): + runner.invoke("generate", "deployment", "foo") + with pushd("foo"): + yield + + +@contextmanager +def isolated_example_code_location_bar( + runner: Union[CliRunner, "ProxyRunner"], in_deployment: bool = True +) -> Iterator[None]: + runner = ProxyRunner(runner) if isinstance(runner, CliRunner) else runner + dagster_git_repo_dir = str(discover_git_root(Path(__file__))) + if in_deployment: + with isolated_example_deployment_foo(runner): + runner.invoke( + "generate", + "code-location", + "--use-editable-dagster", + dagster_git_repo_dir, + "bar", + ) + with pushd("code_locations/bar"): + yield + else: + with runner.isolated_filesystem(): + runner.invoke( + "generate", + "code-location", + "--use-editable-dagster", + dagster_git_repo_dir, + "bar", + ) + with pushd("bar"): + yield + + +@dataclass +class ProxyRunner: + original: CliRunner + prepend_args: Optional[Sequence[str]] = None + + @classmethod + @contextmanager + def test( + cls, use_test_component_lib: bool = True, verbose: bool = False, disable_cache: bool = False + ) -> Iterator[Self]: + with TemporaryDirectory() as cache_dir: + prepend_args = [ + *( + ["--builtin-component-lib", "dagster_components.test"] + if use_test_component_lib + else [] + ), + "--cache-dir", + str(cache_dir), + *(["--verbose"] if verbose else []), + *(["--disable-cache"] if disable_cache else []), + ] + yield cls(CliRunner(), prepend_args=prepend_args) + + def invoke(self, *args: str): + all_args = [*(self.prepend_args or []), *args] + return self.original.invoke(dg_cli, all_args) + + @contextmanager + def isolated_filesystem(self) -> Iterator[None]: + with self.original.isolated_filesystem(): + yield + + +def assert_runner_result(result: Result, exit_0: bool = True) -> None: + try: + assert result.exit_code == 0 if exit_0 else result.exit_code != 0 + except AssertionError: + if result.output: + print(result.output) # noqa: T201 + if result.exc_info: + print_exception_info(result.exc_info) + raise + + +def print_exception_info( + exc_info: Tuple[Type[BaseException], BaseException, TracebackType], +) -> None: + """Prints a nicely formatted traceback for the current exception.""" + exc_type, exc_value, exc_traceback = exc_info + print("Exception Traceback (most recent call last):") # noqa: T201 + formatted_traceback = "".join(traceback.format_tb(exc_traceback)) + print(formatted_traceback) # noqa: T201 + print(f"{exc_type.__name__}: {exc_value}") # noqa: T201 diff --git a/python_modules/libraries/dagster-dg/setup.py b/python_modules/libraries/dagster-dg/setup.py index 767da03708ef4..78f487255a4cc 100644 --- a/python_modules/libraries/dagster-dg/setup.py +++ b/python_modules/libraries/dagster-dg/setup.py @@ -46,6 +46,6 @@ def get_version() -> str: ] }, extras_require={ - "test": ["pytest"], + "test": ["click", "pydantic", "pytest"], }, ) diff --git a/python_modules/libraries/dagster-embedded-elt/dagster_embedded_elt/dlt/asset_decorator.py b/python_modules/libraries/dagster-embedded-elt/dagster_embedded_elt/dlt/asset_decorator.py index b36785d8e4940..c4c556290d80c 100644 --- a/python_modules/libraries/dagster-embedded-elt/dagster_embedded_elt/dlt/asset_decorator.py +++ b/python_modules/libraries/dagster-embedded-elt/dagster_embedded_elt/dlt/asset_decorator.py @@ -75,14 +75,17 @@ def dlt_assets( dlt_pipeline (Pipeline): The dlt Pipeline defining the destination parameters. name (Optional[str], optional): The name of the op. group_name (Optional[str], optional): The name of the asset group. - dagster_dlt_translator (DltDagsterTranslator, optional): Customization object for defining asset parameters from dlt resources. + dagster_dlt_translator (DagsterDltTranslator, optional): Customization object for defining asset parameters from dlt resources. Examples: Loading Hubspot data to Snowflake with an auto materialize policy using the dlt verified source: .. code-block:: python - class HubspotDltDagsterTranslator(DltDagsterTranslator): + from dagster_embedded_elt.dlt import DagsterDltResource, DagsterDltTranslator, dlt_assets + + + class HubspotDagsterDltTranslator(DagsterDltTranslator): @public def get_auto_materialize_policy(self, resource: DltResource) -> Optional[AutoMaterializePolicy]: return AutoMaterializePolicy.eager().with_rules( @@ -100,15 +103,18 @@ def get_auto_materialize_policy(self, resource: DltResource) -> Optional[AutoMat ), name="hubspot", group_name="hubspot", - dagster_dlt_translator=HubspotDltDagsterTranslator(), + dagster_dlt_translator=HubspotDagsterDltTranslator(), ) - def hubspot_assets(context: AssetExecutionContext, dlt: DltDagsterResource): + def hubspot_assets(context: AssetExecutionContext, dlt: DagsterDltResource): yield from dlt.run(context=context) Loading Github issues to snowflake: .. code-block:: python + from dagster_embedded_elt.dlt import DagsterDltResource, dlt_assets + + @dlt_assets( dlt_source=github_reactions( "dagster-io", "dagster", items_per_page=100, max_items=250 @@ -122,7 +128,7 @@ def hubspot_assets(context: AssetExecutionContext, dlt: DltDagsterResource): name="github", group_name="github", ) - def github_reactions_dagster_assets(context: AssetExecutionContext, dlt: DltDagsterResource): + def github_reactions_dagster_assets(context: AssetExecutionContext, dlt: DagsterDltResource): yield from dlt.run(context=context) """ diff --git a/python_modules/libraries/dagster-embedded-elt/dagster_embedded_elt/dlt/dlt_event_iterator.py b/python_modules/libraries/dagster-embedded-elt/dagster_embedded_elt/dlt/dlt_event_iterator.py index 724791278a30e..f11d56b177906 100644 --- a/python_modules/libraries/dagster-embedded-elt/dagster_embedded_elt/dlt/dlt_event_iterator.py +++ b/python_modules/libraries/dagster-embedded-elt/dagster_embedded_elt/dlt/dlt_event_iterator.py @@ -1,5 +1,4 @@ -from collections import abc -from typing import Generic, Iterator, Optional, Union +from typing import Iterator, Optional, Union from dagster import AssetMaterialization, MaterializeResult from dagster._annotations import experimental, public @@ -58,7 +57,7 @@ def fetch_row_count_metadata( return TableMetadataSet(row_count=None) -class DltEventIterator(Generic[T], abc.Iterator): +class DltEventIterator(Iterator[T]): """A wrapper around an iterator of Dlt events which contains additional methods for post-processing the events, such as fetching column metadata. """ diff --git a/python_modules/libraries/dagster-embedded-elt/dagster_embedded_elt/sling/sling_event_iterator.py b/python_modules/libraries/dagster-embedded-elt/dagster_embedded_elt/sling/sling_event_iterator.py index 55ff4198785eb..9e46a47725c94 100644 --- a/python_modules/libraries/dagster-embedded-elt/dagster_embedded_elt/sling/sling_event_iterator.py +++ b/python_modules/libraries/dagster-embedded-elt/dagster_embedded_elt/sling/sling_event_iterator.py @@ -1,6 +1,5 @@ import re -from collections import abc -from typing import TYPE_CHECKING, Any, Dict, Generic, Iterator, Optional, Sequence, Union, cast +from typing import TYPE_CHECKING, Any, Dict, Iterator, Optional, Sequence, Union, cast from dagster import ( AssetMaterialization, @@ -168,7 +167,7 @@ def fetch_column_metadata( return {} -class SlingEventIterator(Generic[T], abc.Iterator): +class SlingEventIterator(Iterator[T]): """A wrapper around an iterator of Sling events which contains additional methods for post-processing the events, such as fetching column metadata. """ diff --git a/python_modules/libraries/dagster-fivetran/dagster_fivetran/asset_decorator.py b/python_modules/libraries/dagster-fivetran/dagster_fivetran/asset_decorator.py index 12989283941f9..2f8fba289d00b 100644 --- a/python_modules/libraries/dagster-fivetran/dagster_fivetran/asset_decorator.py +++ b/python_modules/libraries/dagster-fivetran/dagster_fivetran/asset_decorator.py @@ -74,10 +74,9 @@ def fivetran_connector_assets(context: dg.AssetExecutionContext, fivetran: Fivet class CustomDagsterFivetranTranslator(DagsterFivetranTranslator): def get_asset_spec(self, props: FivetranConnectorTableProps) -> dg.AssetSpec: - asset_spec = super().get_asset_spec(props) - return replace_attributes( - asset_spec, - key=asset_spec.key.with_prefix("my_prefix"), + default_spec = super().get_asset_spec(props) + return default_spec.replace_attributes( + key=default_spec.key.with_prefix("my_prefix"), ) diff --git a/python_modules/libraries/dagster-fivetran/dagster_fivetran/asset_defs.py b/python_modules/libraries/dagster-fivetran/dagster_fivetran/asset_defs.py index f746c6b3728ea..712ff5e8bad60 100644 --- a/python_modules/libraries/dagster-fivetran/dagster_fivetran/asset_defs.py +++ b/python_modules/libraries/dagster-fivetran/dagster_fivetran/asset_defs.py @@ -791,10 +791,9 @@ def build_fivetran_assets_definitions( class CustomDagsterFivetranTranslator(DagsterFivetranTranslator): def get_asset_spec(self, props: FivetranConnectorTableProps) -> dg.AssetSpec: - asset_spec = super().get_asset_spec(props) - return replace_attributes( - asset_spec, - key=asset_spec.key.with_prefix("my_prefix"), + default_spec = super().get_asset_spec(props) + return default_spec.replace_attributes( + key=default_spec.key.with_prefix("my_prefix"), ) diff --git a/python_modules/libraries/dagster-fivetran/dagster_fivetran/fivetran_event_iterator.py b/python_modules/libraries/dagster-fivetran/dagster_fivetran/fivetran_event_iterator.py new file mode 100644 index 0000000000000..f1ea98a332760 --- /dev/null +++ b/python_modules/libraries/dagster-fivetran/dagster_fivetran/fivetran_event_iterator.py @@ -0,0 +1,155 @@ +import os +from concurrent.futures import ThreadPoolExecutor +from typing import TYPE_CHECKING, Any, Callable, Dict, Iterator, Union + +from dagster import ( + AssetExecutionContext, + AssetMaterialization, + MaterializeResult, + OpExecutionContext, + _check as check, +) +from dagster._annotations import experimental, public +from dagster._core.definitions.metadata.metadata_set import TableMetadataSet +from dagster._core.utils import imap +from typing_extensions import TypeVar + +from dagster_fivetran.translator import FivetranMetadataSet +from dagster_fivetran.utils import get_column_schema_for_columns, get_fivetran_connector_table_name + +if TYPE_CHECKING: + from dagster_fivetran.resources import FivetranWorkspace + +FivetranEventType = Union[AssetMaterialization, MaterializeResult] +T = TypeVar("T", bound=FivetranEventType) + +DEFAULT_MAX_THREADPOOL_WORKERS = 10 + + +def _fetch_column_metadata( + materialization: FivetranEventType, + fivetran_workspace: "FivetranWorkspace", +) -> Dict[str, Any]: + """Subroutine to fetch column metadata for a given table from the Fivetran API.""" + materialization_metadata = check.not_none(materialization.metadata) + connector_id = check.not_none( + FivetranMetadataSet.extract(materialization_metadata).connector_id + ) + schema_name = check.not_none( + FivetranMetadataSet.extract(materialization_metadata).destination_schema_name + ) + table_name = check.not_none( + FivetranMetadataSet.extract(materialization_metadata).destination_table_name + ) + + client = fivetran_workspace.get_client() + + metadata = {} + try: + table_conn_data = client.get_columns_config_for_table( + connector_id=connector_id, + schema_name=schema_name, + table_name=table_name, + ) + + columns = check.dict_elem(table_conn_data, "columns") + metadata = {**TableMetadataSet(column_schema=get_column_schema_for_columns(columns))} + except Exception as e: + client._log.warning( # noqa + f"An error occurred while fetching column metadata for table " + f"{get_fivetran_connector_table_name(schema_name=schema_name, table_name=table_name)}." + "Column metadata will not be included in the event.\n\n" + f"Exception: {e}", + exc_info=True, + ) + return metadata + + +class FivetranEventIterator(Iterator[T]): + """A wrapper around an iterator of Fivetran events which contains additional methods for + post-processing the events, such as fetching column metadata. + """ + + def __init__( + self, + events: Iterator[T], + fivetran_workspace: "FivetranWorkspace", + context: Union[OpExecutionContext, AssetExecutionContext], + ) -> None: + self._inner_iterator = events + self._fivetran_workspace = fivetran_workspace + self._context = context + + def __next__(self) -> T: + return next(self._inner_iterator) + + def __iter__(self) -> "FivetranEventIterator[T]": + return self + + @experimental + @public + def fetch_column_metadata(self) -> "FivetranEventIterator": + """Fetches column metadata for each table synced with the Fivetran API. + + Retrieves the column schema for each destination table. + + Returns: + FivetranEventIterator: An iterator of Dagster events with column metadata attached. + """ + fetch_metadata_fn: Callable[ + [FivetranEventType], + Dict[str, Any], + ] = lambda materialization: _fetch_column_metadata( + materialization=materialization, + fivetran_workspace=self._fivetran_workspace, + ) + + return self._attach_metadata(fetch_metadata_fn) + + def _attach_metadata( + self, + fn: Callable[[FivetranEventType], Dict[str, Any]], + ) -> "FivetranEventIterator": + """Runs a threaded task to attach metadata to each event in the iterator. + + Args: + fn (Callable[[Union[AssetMaterialization, MaterializeResult]], Dict[str, Any]]): + A function which takes a FivetranEventType and returns + a dictionary of metadata to attach to the event. + + Returns: + Iterator[Union[AssetMaterialization, MaterializeResult]]: + A set of corresponding Dagster events for Fivetran tables, with any metadata output + by the function attached, yielded in the order they are emitted by the Fivetran API. + """ + + def _map_fn(event: FivetranEventType) -> FivetranEventType: + return event._replace(metadata={**check.is_dict(event.metadata), **fn(event)}) + + def _threadpool_wrap_map_fn() -> Iterator[FivetranEventType]: + assets_def = self._context.assets_def + connector_id = next( + check.not_none(FivetranMetadataSet.extract(spec.metadata).connector_id) + for spec in assets_def.specs + ) + + with ThreadPoolExecutor( + max_workers=int( + os.getenv( + "FIVETRAN_POSTPROCESSING_THREADPOOL_WORKERS", + default=DEFAULT_MAX_THREADPOOL_WORKERS, + ) + ), + thread_name_prefix=f"fivetran_{connector_id}", + ) as executor: + yield from imap( + executor=executor, + iterable=self._inner_iterator, + func=_map_fn, + ) + + return FivetranEventIterator( + events=_threadpool_wrap_map_fn(), + fivetran_workspace=self._fivetran_workspace, + context=self._context, + ) diff --git a/python_modules/libraries/dagster-fivetran/dagster_fivetran/resources.py b/python_modules/libraries/dagster-fivetran/dagster_fivetran/resources.py index 01936211cc2ed..82b36a45ad1bd 100644 --- a/python_modules/libraries/dagster-fivetran/dagster_fivetran/resources.py +++ b/python_modules/libraries/dagster-fivetran/dagster_fivetran/resources.py @@ -4,7 +4,7 @@ import time from datetime import datetime, timedelta from functools import partial -from typing import Any, Callable, Iterator, Mapping, Optional, Sequence, Tuple, Union +from typing import Any, Callable, Mapping, Optional, Sequence, Tuple, Union from urllib.parse import urljoin import requests @@ -30,10 +30,11 @@ from dagster._record import as_dict, record from dagster._utils.cached_method import cached_method from dagster._vendored.dateutil import parser -from pydantic import Field, PrivateAttr +from pydantic import Field from requests.auth import HTTPBasicAuth from requests.exceptions import RequestException +from dagster_fivetran.fivetran_event_iterator import FivetranEventIterator from dagster_fivetran.translator import ( DagsterFivetranTranslator, FivetranConnector, @@ -858,8 +859,6 @@ class FivetranWorkspace(ConfigurableResource): ), ) - _client: FivetranClient = PrivateAttr(default=None) - @cached_method def get_client(self) -> FivetranClient: return FivetranClient( @@ -969,11 +968,11 @@ def _generate_materialization( schema_config_details=fivetran_output.schema_config ) - for schema_source_name, schema in schema_config.schemas.items(): + for schema in schema_config.schemas.values(): if not schema.enabled: continue - for table_source_name, table in schema.tables.items(): + for table in schema.tables.values(): if not table.enabled: continue @@ -1006,14 +1005,17 @@ def _generate_materialization( schema=schema.name_in_destination, table=table.name_in_destination, ), - "schema_source_name": schema_source_name, - "table_source_name": table_source_name, + **FivetranMetadataSet( + connector_id=connector.id, + destination_schema_name=schema.name_in_destination, + destination_table_name=table.name_in_destination, + ), }, ) def sync_and_poll( self, context: Union[OpExecutionContext, AssetExecutionContext] - ) -> Iterator[Union[AssetMaterialization, MaterializeResult]]: + ) -> FivetranEventIterator[Union[AssetMaterialization, MaterializeResult]]: """Executes a sync and poll process to materialize Fivetran assets. Args: @@ -1025,9 +1027,13 @@ def sync_and_poll( Iterator[Union[AssetMaterialization, MaterializeResult]]: An iterator of MaterializeResult or AssetMaterialization. """ + return FivetranEventIterator( + events=self._sync_and_poll(context=context), fivetran_workspace=self, context=context + ) + + def _sync_and_poll(self, context: Union[OpExecutionContext, AssetExecutionContext]): assets_def = context.assets_def dagster_fivetran_translator = get_translator_from_fivetran_assets(assets_def) - connector_id = next( check.not_none(FivetranMetadataSet.extract(spec.metadata).connector_id) for spec in assets_def.specs diff --git a/python_modules/libraries/dagster-fivetran/dagster_fivetran/translator.py b/python_modules/libraries/dagster-fivetran/dagster_fivetran/translator.py index 3a35d335703b5..054b6cee51301 100644 --- a/python_modules/libraries/dagster-fivetran/dagster_fivetran/translator.py +++ b/python_modules/libraries/dagster-fivetran/dagster_fivetran/translator.py @@ -251,6 +251,8 @@ def to_fivetran_connector_table_props_data(self) -> Sequence[FivetranConnectorTa class FivetranMetadataSet(NamespacedMetadataSet): connector_id: Optional[str] = None + destination_schema_name: Optional[str] = None + destination_table_name: Optional[str] = None @classmethod def namespace(cls) -> str: @@ -284,7 +286,14 @@ def get_asset_spec(self, props: FivetranConnectorTableProps) -> AssetSpec: table=table_name, ) - augmented_metadata = {**metadata, **FivetranMetadataSet(connector_id=props.connector_id)} + augmented_metadata = { + **metadata, + **FivetranMetadataSet( + connector_id=props.connector_id, + destination_schema_name=schema_name, + destination_table_name=table_name, + ), + } return AssetSpec( key=AssetKey(props.table.split(".")), diff --git a/python_modules/libraries/dagster-fivetran/dagster_fivetran/utils.py b/python_modules/libraries/dagster-fivetran/dagster_fivetran/utils.py index 13c67b76135b0..0ec34676aadd8 100644 --- a/python_modules/libraries/dagster-fivetran/dagster_fivetran/utils.py +++ b/python_modules/libraries/dagster-fivetran/dagster_fivetran/utils.py @@ -61,15 +61,7 @@ def metadata_for_table( table_name = None if table_data.get("columns"): columns = check.dict_elem(table_data, "columns") - table_columns = sorted( - [ - TableColumn(name=col["name_in_destination"], type="") - for col in columns.values() - if "name_in_destination" in col and col.get("enabled") - ], - key=lambda col: col.name, - ) - column_schema = TableSchema(columns=table_columns) + column_schema = get_column_schema_for_columns(columns=columns) if include_column_info: metadata["column_info"] = MetadataValue.json(columns) @@ -84,6 +76,18 @@ def metadata_for_table( return metadata +def get_column_schema_for_columns(columns: Mapping[str, Any]): + table_columns = sorted( + [ + TableColumn(name=col["name_in_destination"], type="") + for col in columns.values() + if "name_in_destination" in col and col.get("enabled") + ], + key=lambda col: col.name, + ) + return TableSchema(columns=table_columns) + + def _table_data_to_materialization( fivetran_output: FivetranOutput, asset_key_prefix: Sequence[str], diff --git a/python_modules/libraries/dagster-fivetran/dagster_fivetran_tests/experimental/conftest.py b/python_modules/libraries/dagster-fivetran/dagster_fivetran_tests/experimental/conftest.py index 76df67e6416e3..a28903099a709 100644 --- a/python_modules/libraries/dagster-fivetran/dagster_fivetran_tests/experimental/conftest.py +++ b/python_modules/libraries/dagster-fivetran/dagster_fivetran_tests/experimental/conftest.py @@ -20,6 +20,8 @@ TEST_SCHEMA_NAME = "schema_name_in_destination_1" TEST_TABLE_NAME = "table_name_in_destination_1" +TEST_SECOND_SCHEMA_NAME = "schema_name_in_destination_2" +TEST_SECOND_TABLE_NAME = "table_name_in_destination_2" TEST_ANOTHER_TABLE_NAME = "another_table_name_in_destination_1" # Taken from Fivetran API documentation @@ -343,7 +345,7 @@ def get_sample_schema_config_for_connector(table_name: str) -> Mapping[str, Any] "is_primary_key": True, }, "property2": { - "name_in_destination": "column_name_in_destination_1", + "name_in_destination": "column_name_in_destination_2", "enabled": True, "hashed": False, "enabled_patch_settings": { diff --git a/python_modules/libraries/dagster-fivetran/dagster_fivetran_tests/experimental/test_columns_metadata.py b/python_modules/libraries/dagster-fivetran/dagster_fivetran_tests/experimental/test_columns_metadata.py new file mode 100644 index 0000000000000..5550a82c9bce3 --- /dev/null +++ b/python_modules/libraries/dagster-fivetran/dagster_fivetran_tests/experimental/test_columns_metadata.py @@ -0,0 +1,144 @@ +import re +from unittest.mock import MagicMock + +import pytest +import responses +from dagster import AssetExecutionContext, AssetKey, TableColumn, TableSchema +from dagster._config.field_utils import EnvVar +from dagster._core.definitions.materialize import materialize +from dagster._core.definitions.metadata import TableMetadataSet +from dagster._core.definitions.metadata.table import TableColumnConstraints, TableConstraints +from dagster._core.test_utils import environ +from dagster_fivetran import FivetranWorkspace, fivetran_assets + +from dagster_fivetran_tests.experimental.conftest import ( + SAMPLE_SOURCE_TABLE_COLUMNS_CONFIG, + TEST_ACCOUNT_ID, + TEST_API_KEY, + TEST_API_SECRET, + TEST_SCHEMA_NAME, + TEST_SECOND_SCHEMA_NAME, + TEST_SECOND_TABLE_NAME, + TEST_TABLE_NAME, + get_fivetran_connector_api_url, +) + + +def test_column_schema( + connector_id: str, + fetch_workspace_data_api_mocks: responses.RequestsMock, + sync_and_poll: MagicMock, + capsys: pytest.CaptureFixture, +) -> None: + with environ({"FIVETRAN_API_KEY": TEST_API_KEY, "FIVETRAN_API_SECRET": TEST_API_SECRET}): + test_connector_api_url = get_fivetran_connector_api_url(connector_id) + for schema_name, table_name in [ + (TEST_SCHEMA_NAME, TEST_TABLE_NAME), + (TEST_SCHEMA_NAME, TEST_SECOND_TABLE_NAME), + (TEST_SECOND_SCHEMA_NAME, TEST_TABLE_NAME), + (TEST_SECOND_SCHEMA_NAME, TEST_SECOND_TABLE_NAME), + ]: + fetch_workspace_data_api_mocks.add( + method=responses.GET, + url=f"{test_connector_api_url}/schemas/{schema_name}/tables/{table_name}/columns", + json=SAMPLE_SOURCE_TABLE_COLUMNS_CONFIG, + status=200, + ) + + workspace = FivetranWorkspace( + account_id=TEST_ACCOUNT_ID, + api_key=EnvVar("FIVETRAN_API_KEY"), + api_secret=EnvVar("FIVETRAN_API_SECRET"), + ) + + @fivetran_assets(connector_id=connector_id, workspace=workspace, name=connector_id) + def my_fivetran_assets(context: AssetExecutionContext, fivetran: FivetranWorkspace): + yield from fivetran.sync_and_poll(context=context).fetch_column_metadata() + + for schema_name, table_name in [ + (TEST_SCHEMA_NAME, TEST_TABLE_NAME), + (TEST_SCHEMA_NAME, TEST_SECOND_TABLE_NAME), + (TEST_SECOND_SCHEMA_NAME, TEST_TABLE_NAME), + (TEST_SECOND_SCHEMA_NAME, TEST_SECOND_TABLE_NAME), + ]: + table_spec = my_fivetran_assets.get_asset_spec( + AssetKey( + [ + schema_name, + table_name, + ] + ) + ) + spec_table_schema = TableMetadataSet.extract(table_spec.metadata).column_schema + + expected_spec_table_schema = TableSchema( + columns=[ + TableColumn( + name="column_name_in_destination_1", + type="", + description=None, + constraints=TableColumnConstraints(nullable=True, unique=False, other=[]), + tags={}, + ), + TableColumn( + name="column_name_in_destination_2", + type="", + description=None, + constraints=TableColumnConstraints(nullable=True, unique=False, other=[]), + tags={}, + ), + ], + constraints=TableConstraints(other=[]), + ) + + assert spec_table_schema == expected_spec_table_schema + + result = materialize( + [my_fivetran_assets], + resources={"fivetran": workspace}, + ) + assert result.success + + for schema_name, table_name in [ + (TEST_SCHEMA_NAME, TEST_TABLE_NAME), + (TEST_SCHEMA_NAME, TEST_SECOND_TABLE_NAME), + (TEST_SECOND_SCHEMA_NAME, TEST_TABLE_NAME), + (TEST_SECOND_SCHEMA_NAME, TEST_SECOND_TABLE_NAME), + ]: + table_schema_by_asset_key = { + event.materialization.asset_key: TableMetadataSet.extract( + event.materialization.metadata + ).column_schema + for event in result.get_asset_materialization_events() + if event.materialization.asset_key + == AssetKey( + [ + schema_name, + table_name, + ] + ) + } + expected_table_schema_by_asset_key = { + AssetKey( + [ + schema_name, + table_name, + ] + ): TableSchema( + columns=[ + TableColumn("column_name_in_destination_1", type=""), + TableColumn("column_name_in_destination_2", type=""), + ] + ), + } + + assert table_schema_by_asset_key == expected_table_schema_by_asset_key + + captured = capsys.readouterr() + # If an exception occurs in fetch_column_metadata, + # a message is logged as a warning and the exception is not raised. + # We test that this message is not in the logs. + assert not re.search( + r"dagster - WARNING - (?s:.)+ - An error occurred while fetching column metadata for table", + captured.err, + ) diff --git a/python_modules/libraries/dagster-gcp/dagster_gcp/dataproc/configs.py b/python_modules/libraries/dagster-gcp/dagster_gcp/dataproc/configs.py index d969d7f820823..ca94099613767 100644 --- a/python_modules/libraries/dagster-gcp/dagster_gcp/dataproc/configs.py +++ b/python_modules/libraries/dagster-gcp/dagster_gcp/dataproc/configs.py @@ -1,4 +1,4 @@ -from dagster import Field, StringSource +from dagster import Field, Permissive, StringSource from dagster_gcp.dataproc.configs_dataproc_cluster import define_dataproc_cluster_config from dagster_gcp.dataproc.configs_dataproc_job import define_dataproc_job_config @@ -11,12 +11,23 @@ def define_dataproc_create_cluster_config(): Names of deleted clusters can be reused.""", is_required=True, ) + labels = Field( + Permissive(), + description="""Optional. The labels to associate with this cluster. Label keys must + contain 1 to 63 characters, and must conform to RFC 1035 + (https://www.ietf.org/rfc/rfc1035.txt). Label values may be empty, but, if + present, must contain 1 to 63 characters, and must conform to RFC 1035 + (https://www.ietf.org/rfc/rfc1035.txt). No more than 32 labels can be associated + with a cluster.""", + is_required=False, + ) return { "projectId": _define_project_id_config(), "region": _define_region_config(), "clusterName": cluster_name, "cluster_config": define_dataproc_cluster_config(), + "labels": labels, } diff --git a/python_modules/libraries/dagster-gcp/dagster_gcp/dataproc/resources.py b/python_modules/libraries/dagster-gcp/dagster_gcp/dataproc/resources.py index bd739f7197621..35db40399f0ea 100644 --- a/python_modules/libraries/dagster-gcp/dagster_gcp/dataproc/resources.py +++ b/python_modules/libraries/dagster-gcp/dagster_gcp/dataproc/resources.py @@ -33,8 +33,9 @@ def __init__(self, config): self.config = config - (self.project_id, self.region, self.cluster_name, self.cluster_config) = ( - self.config.get(k) for k in ("projectId", "region", "clusterName", "cluster_config") + (self.project_id, self.region, self.cluster_name, self.cluster_config, self.labels) = ( + self.config.get(k) + for k in ("projectId", "region", "clusterName", "cluster_config", "labels") ) @property @@ -60,6 +61,7 @@ def create_cluster(self): "projectId": self.project_id, "clusterName": self.cluster_name, "config": self.cluster_config, + "labels": self.labels, }, ).execute() ) @@ -177,6 +179,17 @@ def my_asset(dataproc: DataprocResource): " deleted clusters can be reused." ) ) + labels: Optional[dict[str, str]] = Field( + default=None, + description=( + "Optional. The labels to associate with this cluster. Label keys must" + " contain 1 to 63 characters, and must conform to RFC 1035" + " (https://www.ietf.org/rfc/rfc1035.txt). Label values may be empty, but, if" + " present, must contain 1 to 63 characters, and must conform to RFC 1035" + " (https://www.ietf.org/rfc/rfc1035.txt). No more than 32 labels can be associated" + " with a cluster." + ), + ) cluster_config_yaml_path: Optional[str] = Field( default=None, description=( @@ -249,6 +262,7 @@ def get_client(self) -> DataprocClient: "region": self.region, "clusterName": self.cluster_name, "cluster_config": cluster_config, + "labels": self.labels, } return DataprocClient(config=client_config_dict) diff --git a/python_modules/libraries/dagster-gcp/dagster_gcp_tests/dataproc_tests/test_resources.py b/python_modules/libraries/dagster-gcp/dagster_gcp_tests/dataproc_tests/test_resources.py index c1ff37fb269da..9cf0b1ea2147a 100644 --- a/python_modules/libraries/dagster-gcp/dagster_gcp_tests/dataproc_tests/test_resources.py +++ b/python_modules/libraries/dagster-gcp/dagster_gcp_tests/dataproc_tests/test_resources.py @@ -22,6 +22,7 @@ DATAPROC_CLUSTERS_URI = f"{DATAPROC_BASE_URI}/clusters" DATAPROC_JOBS_URI = f"{DATAPROC_BASE_URI}/jobs" DATAPROC_SCHEMA_URI = "https://www.googleapis.com/discovery/v1/apis/dataproc/v1/rest" +DATAPROC_LABELS = {"first_label": "true", "second_label": "true"} EXPECTED_RESULTS = [ # OAuth authorize credentials @@ -239,6 +240,59 @@ def test_dataproc(): assert result.success +@pytest.mark.integration +def test_dataproc_resource_labels(): + """Tests pydantic dataproc cluster creation/deletion. Requests are captured by the responses library, so + no actual HTTP requests are made here. + + Note that inspecting the HTTP requests can be useful for debugging, which can be done by adding: + + import httplib2 + httplib2.debuglevel = 4 + """ + with mock.patch("httplib2.Http", new=HttpSnooper): + + @job + def test_dataproc(): + configurable_dataproc_op() + + result = test_dataproc.execute_in_process( + run_config=RunConfig( + ops={ + "configurable_dataproc_op": DataprocOpConfig( + job_scoped_cluster=True, + project_id=PROJECT_ID, + region=REGION, + job_config={ + "reference": {"projectId": PROJECT_ID}, + "placement": {"clusterName": CLUSTER_NAME}, + "hiveJob": {"queryList": {"queries": ["SHOW DATABASES"]}}, + }, + ) + }, + ), + resources={ + "dataproc": DataprocResource( + project_id=PROJECT_ID, + cluster_name=CLUSTER_NAME, + region=REGION, + labels=DATAPROC_LABELS, + cluster_config_dict={ + "softwareConfig": { + "properties": { + # Create a single-node cluster + # This needs to be the string "true" when + # serialized, not a boolean true + "dataproc:dataproc.allow.zero.workers": "true" + } + } + }, + ) + }, + ) + assert result.success + + @pytest.mark.integration def test_wait_for_job_with_timeout_pydantic(): """Test submitting a job with timeout of 0 second so that it always fails.""" diff --git a/python_modules/libraries/dagster-ge/dagster_ge/factory.py b/python_modules/libraries/dagster-ge/dagster_ge/factory.py index bf42e6e8a5310..b0313bc4d88df 100644 --- a/python_modules/libraries/dagster-ge/dagster_ge/factory.py +++ b/python_modules/libraries/dagster-ge/dagster_ge/factory.py @@ -27,7 +27,7 @@ class GEContextResource(ConfigurableResource, IAttachDifferentObjectToOpContext): - ge_root_dir: str = Field( + ge_root_dir: Optional[str] = Field( default=None, description="The root directory for your Great Expectations project.", ) diff --git a/python_modules/libraries/dagster-k8s/dagster_k8s/client.py b/python_modules/libraries/dagster-k8s/dagster_k8s/client.py index 26c543f26f1e9..fbf2dfce2484c 100644 --- a/python_modules/libraries/dagster-k8s/dagster_k8s/client.py +++ b/python_modules/libraries/dagster-k8s/dagster_k8s/client.py @@ -673,7 +673,6 @@ def wait_for_pod( # State checks below, see: # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.18/#containerstate-v1-core state = container_status.state - if state.running is not None: if wait_for_state == WaitForPodState.Ready: # ready is boolean field of container status @@ -684,6 +683,11 @@ def wait_for_pod( continue else: ready_containers.add(container_status.name) + if container_status.name in initcontainers: + self.logger( + f'Init container "{container_status.name}" is ready, waiting for non-init containers...' + ) + continue if initcontainers.issubset(exited_containers | ready_containers): self.logger(f'Pod "{pod_name}" is ready, done waiting') break diff --git a/python_modules/libraries/dagster-k8s/dagster_k8s_tests/unit_tests/test_client.py b/python_modules/libraries/dagster-k8s/dagster_k8s_tests/unit_tests/test_client.py index ad754885590bf..983a6d5707c2f 100644 --- a/python_modules/libraries/dagster-k8s/dagster_k8s_tests/unit_tests/test_client.py +++ b/python_modules/libraries/dagster-k8s/dagster_k8s_tests/unit_tests/test_client.py @@ -427,7 +427,10 @@ def test_retrieve_pod_logs(): assert mock_client.retrieve_pod_logs("pod", "namespace") == "a_string" -def _pod_list_for_container_status(*container_statuses, init_container_statuses=None): +def _pod_list_for_container_status( + *container_statuses, + init_container_statuses=None, +): return V1PodList( items=[ V1Pod( @@ -805,23 +808,42 @@ def test_waiting_for_pod_initialize_with_ignored_containers(): def test_waiting_for_pod_initialize_with_init_container(): - mock_client = create_mocked_client(timer=create_timing_out_timer(num_good_ticks=3)) + mock_client = create_mocked_client(timer=create_timing_out_timer(num_good_ticks=4)) waiting_container_status = _create_status( state=V1ContainerState( waiting=V1ContainerStateWaiting(reason=KubernetesWaitingReasons.PodInitializing) ), ready=False, ) + waiting_initcontainer_status = _create_status( + name="initcontainer", + state=V1ContainerState( + waiting=V1ContainerStateWaiting(reason=KubernetesWaitingReasons.PodInitializing) + ), + ready=False, + ) + + ready_initcontainer_status = _ready_running_status(name="initcontainer") + + # Initial status single_waiting_pod = _pod_list_for_container_status( - waiting_container_status, init_container_statuses=[waiting_container_status] + waiting_container_status, init_container_statuses=[waiting_initcontainer_status] + ) + + # Init container is ready, but not the main container + single_only_init_ready_pod = _pod_list_for_container_status( + waiting_container_status, init_container_statuses=[ready_initcontainer_status] ) + + # Main container is ready single_ready_running_pod = _pod_list_for_container_status( - waiting_container_status, init_container_statuses=[_ready_running_status()] + _ready_running_status(), init_container_statuses=[ready_initcontainer_status] ) mock_client.core_api.list_namespaced_pod.side_effect = [ single_waiting_pod, single_waiting_pod, + single_only_init_ready_pod, single_ready_running_pod, ] @@ -833,6 +855,7 @@ def test_waiting_for_pod_initialize_with_init_container(): [ f'Waiting for pod "{pod_name}"', f'Waiting for pod "{pod_name}" to initialize...', + f'Init container "{ready_initcontainer_status.name}" is ready, waiting for non-init containers...', f'Pod "{pod_name}" is ready, done waiting', ], ) @@ -841,7 +864,7 @@ def test_waiting_for_pod_initialize_with_init_container(): def test_wait_for_pod_initialize_with_multiple_init_containers(): - mock_client = create_mocked_client(timer=create_timing_out_timer(num_good_ticks=4)) + mock_client = create_mocked_client(timer=create_timing_out_timer(num_good_ticks=5)) waiting_container_status = _create_status( state=V1ContainerState( waiting=V1ContainerStateWaiting(reason=KubernetesWaitingReasons.PodInitializing) @@ -878,11 +901,17 @@ def test_wait_for_pod_initialize_with_multiple_init_containers(): init_container_statuses=[ready_initcontainer1_status, ready_initcontainer2_status], ) + all_ready_pod = _pod_list_for_container_status( + _ready_running_status(), + init_container_statuses=[ready_initcontainer1_status, ready_initcontainer2_status], + ) + mock_client.core_api.list_namespaced_pod.side_effect = [ two_waiting_inits_pod, two_waiting_inits_pod, single_init_ready_waiting_pod, both_init_ready_waiting_pod, + all_ready_pod, ] pod_name = "a_pod" @@ -893,6 +922,8 @@ def test_wait_for_pod_initialize_with_multiple_init_containers(): [ f'Waiting for pod "{pod_name}"', f'Waiting for pod "{pod_name}" to initialize...', + f'Init container "{ready_initcontainer1_status.name}" is ready, waiting for non-init containers...', + f'Init container "{ready_initcontainer2_status.name}" is ready, waiting for non-init containers...', f'Pod "{pod_name}" is ready, done waiting', ], ) @@ -901,7 +932,7 @@ def test_wait_for_pod_initialize_with_multiple_init_containers(): # Container states are evaluated in the order that they are in the pod manifest, but # it's possible that the second initcontainer can finish first and we test that here. def test_wait_for_pod_initialize_with_multiple_init_containers_backwards(): - mock_client = create_mocked_client(timer=create_timing_out_timer(num_good_ticks=5)) + mock_client = create_mocked_client(timer=create_timing_out_timer(num_good_ticks=6)) waiting_container_status = _create_status( state=V1ContainerState( waiting=V1ContainerStateWaiting(reason=KubernetesWaitingReasons.PodInitializing) @@ -937,6 +968,10 @@ def test_wait_for_pod_initialize_with_multiple_init_containers_backwards(): waiting_container_status, init_container_statuses=[ready_initcontainer1_status, ready_initcontainer2_status], ) + all_ready_pod = _pod_list_for_container_status( + _ready_running_status(), + init_container_statuses=[ready_initcontainer1_status, ready_initcontainer2_status], + ) # we need an extra side effect here compared to the above test since # there's an extra loop iteration @@ -946,17 +981,19 @@ def test_wait_for_pod_initialize_with_multiple_init_containers_backwards(): single_init_ready_waiting_pod, both_init_ready_waiting_pod, both_init_ready_waiting_pod, + all_ready_pod, ] pod_name = "a_pod" mock_client.wait_for_pod(pod_name=pod_name, namespace="namespace") - assert_logger_calls( mock_client.logger, [ f'Waiting for pod "{pod_name}"', f'Waiting for pod "{pod_name}" to initialize...', f'Waiting for pod "{pod_name}" to initialize...', + f'Init container "{ready_initcontainer1_status.name}" is ready, waiting for non-init containers...', + f'Init container "{ready_initcontainer2_status.name}" is ready, waiting for non-init containers...', f'Pod "{pod_name}" is ready, done waiting', ], ) @@ -964,7 +1001,7 @@ def test_wait_for_pod_initialize_with_multiple_init_containers_backwards(): # init containers may terminate quickly, so a ready state is never observed def test_wait_for_pod_initialize_with_fast_init_containers(): - mock_client = create_mocked_client(timer=create_timing_out_timer(num_good_ticks=5)) + mock_client = create_mocked_client(timer=create_timing_out_timer(num_good_ticks=6)) waiting_container_status = _create_status( state=V1ContainerState( waiting=V1ContainerStateWaiting(reason=KubernetesWaitingReasons.PodInitializing) @@ -1007,12 +1044,20 @@ def test_wait_for_pod_initialize_with_fast_init_containers(): ready_initcontainer_slow_status, ], ) + ready_pod = _pod_list_for_container_status( + _ready_running_status(), + init_container_statuses=[ + terminated_initcontainer_fast_status, + ready_initcontainer_slow_status, + ], + ) mock_client.core_api.list_namespaced_pod.side_effect = [ two_waiting_inits_pod, two_waiting_inits_pod, term_and_ready_waiting_pod, term_and_ready_waiting_pod, + ready_pod, ] pod_name = "a_pod" @@ -1024,6 +1069,7 @@ def test_wait_for_pod_initialize_with_fast_init_containers(): f'Waiting for pod "{pod_name}"', f'Waiting for pod "{pod_name}" to initialize...', "Init container init_fast in a_pod has exited successfully", + f'Init container "{ready_initcontainer_slow_status.name}" is ready, waiting for non-init containers...', f'Pod "{pod_name}" is ready, done waiting', ], ) diff --git a/python_modules/libraries/dagster-prometheus/dagster_prometheus/resources.py b/python_modules/libraries/dagster-prometheus/dagster_prometheus/resources.py index da9c1f202d493..993fccede5ddd 100644 --- a/python_modules/libraries/dagster-prometheus/dagster_prometheus/resources.py +++ b/python_modules/libraries/dagster-prometheus/dagster_prometheus/resources.py @@ -46,7 +46,7 @@ def my_job(): default=30, description="is how long delete will attempt to connect before giving up. Defaults to 30s.", ) - _registry: prometheus_client.CollectorRegistry = PrivateAttr(default=None) + _registry: prometheus_client.CollectorRegistry = PrivateAttr(default=None) # type: ignore @classmethod def _is_dagster_maintained(cls) -> bool: diff --git a/python_modules/libraries/dagster-pyspark/dagster_pyspark/resources.py b/python_modules/libraries/dagster-pyspark/dagster_pyspark/resources.py index 9a3c34df7f0d9..71fb888515d6b 100644 --- a/python_modules/libraries/dagster-pyspark/dagster_pyspark/resources.py +++ b/python_modules/libraries/dagster-pyspark/dagster_pyspark/resources.py @@ -132,12 +132,12 @@ def _init_session(self) -> None: @property def spark_session(self) -> Any: self._init_session() - return self._spark_session + return check.not_none(self._spark_session) @property def spark_context(self) -> Any: self._init_session() - return self._spark_session.sparkContext + return check.not_none(self._spark_session).sparkContext @dagster_maintained_resource diff --git a/python_modules/libraries/dagster-shell/dagster_shell/ops.py b/python_modules/libraries/dagster-shell/dagster_shell/ops.py index d22d49b41b1c3..ae0cbf32c6eb3 100644 --- a/python_modules/libraries/dagster-shell/dagster_shell/ops.py +++ b/python_modules/libraries/dagster-shell/dagster_shell/ops.py @@ -36,7 +36,7 @@ class ShellOpConfig(Config): description="An optional dict of environment variables to pass to the subprocess.", ) output_logging: OutputType = Field( - default=OutputType.BUFFER.value, + default=OutputType.BUFFER.value, # type: ignore ) cwd: Optional[str] = Field( default=None, description="Working directory in which to execute shell script" diff --git a/python_modules/libraries/dagster-sigma/dagster_sigma/__init__.py b/python_modules/libraries/dagster-sigma/dagster_sigma/__init__.py index eed90af5624b8..944e51de4e4c2 100644 --- a/python_modules/libraries/dagster-sigma/dagster_sigma/__init__.py +++ b/python_modules/libraries/dagster-sigma/dagster_sigma/__init__.py @@ -1,5 +1,8 @@ from dagster._core.libraries import DagsterLibraryRegistry +from dagster_sigma.assets import ( + build_materialize_workbook_assets_definition as build_materialize_workbook_assets_definition, +) from dagster_sigma.resource import ( SigmaBaseUrl as SigmaBaseUrl, SigmaFilter as SigmaFilter, diff --git a/python_modules/libraries/dagster-sigma/dagster_sigma/assets.py b/python_modules/libraries/dagster-sigma/dagster_sigma/assets.py new file mode 100644 index 0000000000000..8f7ff644e5d6e --- /dev/null +++ b/python_modules/libraries/dagster-sigma/dagster_sigma/assets.py @@ -0,0 +1,38 @@ +from typing import cast + +from dagster import AssetExecutionContext, AssetsDefinition, AssetSpec, multi_asset +from dagster._annotations import experimental + + +@experimental +def build_materialize_workbook_assets_definition( + resource_key: str, + spec: AssetSpec, +) -> AssetsDefinition: + """Returns an AssetsDefinition which will, when materialized, + run all materialization schedules for the targeted Sigma workbook. + Note that this will not update portions of a workbook which are not + assigned to a materialization schedule. + + For more information, see + https://help.sigmacomputing.com/docs/materialization#create-materializations-in-workbooks + + Args: + resource_key (str): The resource key to use for the Sigma resource. + spec (AssetSpec): The asset spec of the Sigma workbook. + + Returns: + AssetsDefinition: The AssetsDefinition which rebuilds a Sigma workbook. + """ + from dagster_sigma import SigmaOrganization + + @multi_asset( + name=f"sigma_materialize_{spec.key.to_python_identifier()}", + specs=[spec], + required_resource_keys={resource_key}, + ) + def asset_fn(context: AssetExecutionContext): + sigma = cast(SigmaOrganization, getattr(context.resources, resource_key)) + yield from sigma.run_materializations_for_workbook(spec) + + return asset_fn diff --git a/python_modules/libraries/dagster-sigma/dagster_sigma/resource.py b/python_modules/libraries/dagster-sigma/dagster_sigma/resource.py index 733be0d2f29fd..55d20e57968e9 100644 --- a/python_modules/libraries/dagster-sigma/dagster_sigma/resource.py +++ b/python_modules/libraries/dagster-sigma/dagster_sigma/resource.py @@ -1,6 +1,8 @@ import asyncio import contextlib +import enum import os +import time import urllib.parse import warnings from collections import defaultdict @@ -30,6 +32,7 @@ from dagster._core.definitions.asset_spec import AssetSpec from dagster._core.definitions.definitions_class import Definitions from dagster._core.definitions.definitions_load_context import StateBackedDefinitionsLoader +from dagster._core.definitions.events import AssetMaterialization from dagster._core.definitions.repository_definition.repository_definition import RepositoryLoadData from dagster._record import IHaveNew, record_custom from dagster._serdes.serdes import deserialize_value @@ -45,6 +48,7 @@ SigmaOrganizationData, SigmaTable, SigmaWorkbook, + SigmaWorkbookMetadataSet, _inode_from_url, ) @@ -53,6 +57,12 @@ logger = get_dagster_logger("dagster_sigma") +class SigmaMaterializationStatus(str, enum.Enum): + PENDING = "pending" + BUILDING = "building" + READY = "ready" + + @record_custom class SigmaFilter(IHaveNew): """Filters the set of Sigma objects to fetch. @@ -162,6 +172,30 @@ async def _fetch_json_async( response.raise_for_status() return await response.json() + def _fetch_json( + self, + endpoint: str, + method: str = "GET", + query_params: Optional[Dict[str, Any]] = None, + json: Optional[Dict[str, Any]] = None, + ) -> Dict[str, Any]: + url = f"{self.base_url}/v2/{endpoint}" + if query_params: + url = f"{url}?{urllib.parse.urlencode(query_params)}" + + response = requests.request( + method=method, + url=url, + headers={ + "Accept": "application/json", + "Authorization": f"Bearer {self.api_token}", + **SIGMA_PARTNER_ID_TAG, + }, + json=json, + ) + response.raise_for_status() + return response.json() + async def _fetch_json_async_paginated_entries( self, endpoint: str, query_params: Optional[Dict[str, Any]] = None, limit: int = 1000 ) -> List[Dict[str, Any]]: @@ -255,6 +289,86 @@ def try_except_http_warn(self, should_catch: bool, msg: str) -> Iterator[None]: else: raise + def _begin_workbook_materialization(self, workbook_id: str, sheet_id: str) -> str: + output = self._fetch_json( + f"workbooks/{workbook_id}/materializations", + method="POST", + json={"sheetId": sheet_id}, + ) + return output["materializationId"] + + def _fetch_materialization_status( + self, workbook_id: str, materialization_id: str + ) -> Dict[str, Any]: + return self._fetch_json(f"workbooks/{workbook_id}/materializations/{materialization_id}") + + def _run_materializations_for_workbook( + self, workbook_id: str, sheet_ids: AbstractSet[str] + ) -> None: + materialization_id_to_sheet = dict( + zip( + [ + self._begin_workbook_materialization(workbook_id, sheet_id) + for sheet_id in sheet_ids + ], + sheet_ids, + ) + ) + remaining_materializations = set(materialization_id_to_sheet.keys()) + + successful_sheets = set() + failed_sheets = set() + + while remaining_materializations: + materialization_statuses = [ + self._fetch_materialization_status(workbook_id, materialization_id) + for materialization_id in remaining_materializations + ] + for status in materialization_statuses: + if status["status"] not in ( + SigmaMaterializationStatus.PENDING, + SigmaMaterializationStatus.BUILDING, + ): + remaining_materializations.remove(status["materializationId"]) + if status["status"] == SigmaMaterializationStatus.READY: + successful_sheets.add( + materialization_id_to_sheet[status["materializationId"]] + ) + else: + failed_sheets.add(materialization_id_to_sheet[status["materializationId"]]) + + time.sleep(5) + + if failed_sheets: + if successful_sheets: + raise Exception( + f"Materializations for sheets {', '.join(failed_sheets)} failed for workbook {workbook_id}" + f", materializations for sheets {', '.join(successful_sheets)} succeeded." + ) + else: + raise Exception( + f"Materializations for sheets {', '.join(failed_sheets)} failed for workbook {workbook_id}" + ) + + def run_materializations_for_workbook( + self, workbook_spec: AssetSpec + ) -> Iterator[AssetMaterialization]: + """Runs all scheduled materializations for a workbook. + + See https://help.sigmacomputing.com/docs/materialization#create-materializations-in-workbooks + for more information. + """ + metadata = SigmaWorkbookMetadataSet.extract(workbook_spec.metadata) + workbook_id = metadata.workbook_id + materialization_schedules = check.is_list( + check.not_none(metadata.materialization_schedules).value + ) + + materialization_sheets = {schedule["sheetId"] for schedule in materialization_schedules} + + self._run_materializations_for_workbook(workbook_id, materialization_sheets) + yield (AssetMaterialization(asset_key=workbook_spec.key)) + @cached_method async def _fetch_dataset_upstreams_by_inode( self, sigma_filter: SigmaFilter @@ -386,6 +500,14 @@ async def build_member_id_to_email_mapping(self) -> Mapping[str, str]: members = (await self._fetch_json_async("members", query_params={"limit": 500}))["entries"] return {member["memberId"]: member["email"] for member in members} + @cached_method + async def _fetch_materialization_schedules_for_workbook( + self, workbook_id: str + ) -> List[Dict[str, Any]]: + return await self._fetch_json_async_paginated_entries( + f"workbooks/{workbook_id}/materialization-schedules" + ) + async def load_workbook_data(self, raw_workbook_data: Dict[str, Any]) -> SigmaWorkbook: dataset_deps = set() direct_table_deps = set() @@ -430,12 +552,17 @@ async def safe_fetch_lineage_for_element( if item.get("type") == "table": direct_table_deps.add(item["nodeId"]) + materialization_schedules = await self._fetch_materialization_schedules_for_workbook( + raw_workbook_data["workbookId"] + ) + return SigmaWorkbook( properties=raw_workbook_data, datasets=dataset_deps, direct_table_deps=direct_table_deps, owner_email=None, lineage=lineages, + materialization_schedules=materialization_schedules, ) @cached_method diff --git a/python_modules/libraries/dagster-sigma/dagster_sigma/translator.py b/python_modules/libraries/dagster-sigma/dagster_sigma/translator.py index a949b73f7b6fb..de54d74219115 100644 --- a/python_modules/libraries/dagster-sigma/dagster_sigma/translator.py +++ b/python_modules/libraries/dagster-sigma/dagster_sigma/translator.py @@ -3,7 +3,12 @@ from dagster import AssetKey, AssetSpec, MetadataValue, TableSchema from dagster._annotations import deprecated -from dagster._core.definitions.metadata.metadata_set import TableMetadataSet +from dagster._core.definitions.metadata.metadata_set import NamespacedMetadataSet, TableMetadataSet +from dagster._core.definitions.metadata.metadata_value import ( + JsonMetadataValue, + TimestampMetadataValue, + UrlMetadataValue, +) from dagster._core.definitions.metadata.table import TableColumn from dagster._record import record from dagster._serdes.serdes import whitelist_for_serdes @@ -26,6 +31,20 @@ def _inode_from_url(url: str) -> str: return f'inode-{url.split("/")[-1]}' +class SigmaWorkbookMetadataSet(NamespacedMetadataSet): + web_url: Optional[UrlMetadataValue] + version: Optional[int] + created_at: Optional[TimestampMetadataValue] + properties: Optional[JsonMetadataValue] + lineage: Optional[JsonMetadataValue] + materialization_schedules: Optional[JsonMetadataValue] = None + workbook_id: str + + @classmethod + def namespace(cls) -> str: + return "dagster_sigma" + + @whitelist_for_serdes @record class SigmaWorkbook: @@ -40,6 +59,7 @@ class SigmaWorkbook: datasets: AbstractSet[str] direct_table_deps: AbstractSet[str] owner_email: Optional[str] + materialization_schedules: Optional[List[Dict[str, Any]]] @whitelist_for_serdes @@ -110,13 +130,23 @@ def get_asset_spec(self, data: Union[SigmaDataset, SigmaWorkbook]) -> AssetSpec: """Get the AssetSpec for a Sigma object, such as a workbook or dataset.""" if isinstance(data, SigmaWorkbook): metadata = { - "dagster_sigma/web_url": MetadataValue.url(data.properties["url"]), - "dagster_sigma/version": data.properties["latestVersion"], - "dagster_sigma/created_at": MetadataValue.timestamp( - isoparse(data.properties["createdAt"]) + **SigmaWorkbookMetadataSet( + web_url=MetadataValue.url(data.properties["url"]), + version=data.properties["latestVersion"], + created_at=MetadataValue.timestamp(isoparse(data.properties["createdAt"])), + properties=MetadataValue.json(data.properties), + lineage=MetadataValue.json(data.lineage), + workbook_id=data.properties["workbookId"], + **( + { + "materialization_schedules": MetadataValue.json( + data.materialization_schedules + ) + } + if data.materialization_schedules + else {} + ), ), - "dagster_sigma/properties": MetadataValue.json(data.properties), - "dagster_sigma/lineage": MetadataValue.json(data.lineage), } datasets = [self._context.get_datasets_by_inode()[inode] for inode in data.datasets] tables = [ diff --git a/python_modules/libraries/dagster-sigma/dagster_sigma_tests/conftest.py b/python_modules/libraries/dagster-sigma/dagster_sigma_tests/conftest.py index e2d862fe35527..29fa8aae9f778 100644 --- a/python_modules/libraries/dagster-sigma/dagster_sigma_tests/conftest.py +++ b/python_modules/libraries/dagster-sigma/dagster_sigma_tests/conftest.py @@ -105,6 +105,29 @@ def lineage_warn_fixture(responses: aioresponses) -> None: ) +@pytest.fixture(name="sigma_materialization") +def sigma_materialization_fixture(responses: aioresponses) -> None: + # Trigger materialization, check status, check status again + request_responses.add( + method=request_responses.POST, + url=f"{SigmaBaseUrl.AWS_US.value}/v2/workbooks/4ea60fe9-f487-43b0-aa7a-3ef43ca3a90e/materializations", + status=200, + body=json.dumps({"materializationId": "foobar"}), + ) + request_responses.add( + method=request_responses.GET, + url=f"{SigmaBaseUrl.AWS_US.value}/v2/workbooks/4ea60fe9-f487-43b0-aa7a-3ef43ca3a90e/materializations/foobar", + status=200, + body=json.dumps({"materializationId": "foobar", "status": "pending"}), + ) + request_responses.add( + method=request_responses.GET, + url=f"{SigmaBaseUrl.AWS_US.value}/v2/workbooks/4ea60fe9-f487-43b0-aa7a-3ef43ca3a90e/materializations/foobar", + status=200, + body=json.dumps({"materializationId": "foobar", "status": "ready"}), + ) + + @pytest.fixture(name="sigma_sample_data") def sigma_sample_data_fixture(responses: aioresponses) -> None: # Single workbook, dataset @@ -133,6 +156,12 @@ def sigma_sample_data_fixture(responses: aioresponses) -> None: body=json.dumps(_build_paginated_response([{"pageId": "qwMyyHBCuC", "name": "Page 1"}])), status=200, ) + responses.add( + method=hdrs.METH_GET, + url="https://aws-api.sigmacomputing.com/v2/workbooks/4ea60fe9-f487-43b0-aa7a-3ef43ca3a90e/materialization-schedules?limit=1000", + body=json.dumps(_build_paginated_response([{"sheetId": "qwMyyHBCuC"}])), + status=200, + ) elements = [ { "elementId": "_MuHPbskp0", diff --git a/python_modules/libraries/dagster-sigma/dagster_sigma_tests/materialize_workbook.py b/python_modules/libraries/dagster-sigma/dagster_sigma_tests/materialize_workbook.py new file mode 100644 index 0000000000000..c5735a936d0af --- /dev/null +++ b/python_modules/libraries/dagster-sigma/dagster_sigma_tests/materialize_workbook.py @@ -0,0 +1,38 @@ +from dagster import EnvVar, asset, define_asset_job +from dagster._core.definitions.definitions_class import Definitions +from dagster._utils.env import environ +from dagster_sigma import ( + SigmaBaseUrl, + SigmaOrganization, + build_materialize_workbook_assets_definition, + load_sigma_asset_specs, +) + +fake_client_id = "fake_client_id" +fake_client_secret = "fake_client_secret" + +with environ({"SIGMA_CLIENT_ID": fake_client_id, "SIGMA_CLIENT_SECRET": fake_client_secret}): + fake_token = "fake_token" + resource = SigmaOrganization( + base_url=SigmaBaseUrl.AWS_US, + client_id=EnvVar("SIGMA_CLIENT_ID"), + client_secret=EnvVar("SIGMA_CLIENT_SECRET"), + ) + + @asset + def my_materializable_asset(): + pass + + sigma_specs = load_sigma_asset_specs(resource) + sigma_assets = [ + build_materialize_workbook_assets_definition("sigma", spec) + if spec.metadata.get("dagster_sigma/materialization_schedules") + else spec + for spec in sigma_specs + ] + + defs = Definitions( + assets=[my_materializable_asset, *sigma_assets], + jobs=[define_asset_job("all_asset_job")], + resources={"sigma": resource}, + ) diff --git a/python_modules/libraries/dagster-sigma/dagster_sigma_tests/test_asset_specs.py b/python_modules/libraries/dagster-sigma/dagster_sigma_tests/test_asset_specs.py index 47e76cea6497f..c3cc5a383c885 100644 --- a/python_modules/libraries/dagster-sigma/dagster_sigma_tests/test_asset_specs.py +++ b/python_modules/libraries/dagster-sigma/dagster_sigma_tests/test_asset_specs.py @@ -4,6 +4,8 @@ import responses from click.testing import CliRunner from dagster._core.code_pointer import CodePointer +from dagster._core.definitions import materialize +from dagster._core.definitions.asset_key import AssetKey from dagster._core.definitions.reconstruct import ( initialize_repository_def_from_pointer, reconstruct_repository_def_from_pointer, @@ -79,6 +81,27 @@ def test_load_assets_organization_data(sigma_auth_token: str, sigma_sample_data: assert len(responses.calls) == calls +@responses.activate +def test_materialize_workbook( + sigma_auth_token: str, sigma_sample_data: None, sigma_materialization: None +) -> None: + with instance_for_test() as _instance: + # first, we resolve the repository to generate our cached metadata + repository_def = initialize_repository_def_from_pointer( + CodePointer.from_python_file( + str(Path(__file__).parent / "materialize_workbook.py"), "defs", None + ), + ) + + workbook_asset = repository_def.assets_defs_by_key[AssetKey(["Sample_Workbook"])] + assert workbook_asset.is_materializable + + # materialize the workbook + with environ({"SIGMA_CLIENT_ID": "fake", "SIGMA_CLIENT_SECRET": "fake"}): + result = materialize([workbook_asset], raise_on_error=False) + assert result.success + + @responses.activate def test_load_assets_organization_data_translator( sigma_auth_token: str, sigma_sample_data: None diff --git a/python_modules/libraries/dagster-sigma/dagster_sigma_tests/test_translator.py b/python_modules/libraries/dagster-sigma/dagster_sigma_tests/test_translator.py index d847ce0074c9b..a32aa3977518e 100644 --- a/python_modules/libraries/dagster-sigma/dagster_sigma_tests/test_translator.py +++ b/python_modules/libraries/dagster-sigma/dagster_sigma_tests/test_translator.py @@ -28,6 +28,7 @@ def test_workbook_translation() -> None: owner_email="ben@dagsterlabs.com", direct_table_deps={SAMPLE_TABLE_INODE}, lineage=[], + materialization_schedules=None, ) sample_dataset = SigmaDataset(properties=SAMPLE_DATASET_DATA, columns=set(), inputs=set()) diff --git a/python_modules/libraries/dagster-tableau/dagster_tableau/assets.py b/python_modules/libraries/dagster-tableau/dagster_tableau/assets.py index 4ab7c99cf038b..55507697ee0a1 100644 --- a/python_modules/libraries/dagster-tableau/dagster_tableau/assets.py +++ b/python_modules/libraries/dagster-tableau/dagster_tableau/assets.py @@ -42,7 +42,7 @@ def build_tableau_materializable_assets_definition( def asset_fn(context: AssetExecutionContext): tableau = cast(BaseTableauWorkspace, getattr(context.resources, resource_key)) with tableau.get_client() as client: - yield from client.refresh_and_materialize_workbooks( # pyright: ignore[reportOptionalMemberAccess] + yield from client.refresh_and_materialize_workbooks( specs=specs, refreshable_workbook_ids=refreshable_workbook_ids )