diff --git a/docs/docs-next/docs/changelog.md b/docs/docs-next/docs/changelog.md index 21a6e631c830a..8ffc19ef95cc9 100644 --- a/docs/docs-next/docs/changelog.md +++ b/docs/docs-next/docs/changelog.md @@ -2,4 +2,4 @@ title: "Changelog" --- -# Changelog \ No newline at end of file +# Changelog diff --git a/docs/docs-next/docs/concepts/assets/asset-checks.md b/docs/docs-next/docs/concepts/assets/asset-checks.md index 913b2c1a53ede..5efe70f534dad 100644 --- a/docs/docs-next/docs/concepts/assets/asset-checks.md +++ b/docs/docs-next/docs/concepts/assets/asset-checks.md @@ -3,4 +3,4 @@ title: "Asset checks" sidebar_position: 70 --- -# Asset checks \ No newline at end of file +# Asset checks diff --git a/docs/docs-next/docs/concepts/assets/asset-dependencies.md b/docs/docs-next/docs/concepts/assets/asset-dependencies.md index 5c8105f20d7ba..39a211b81243a 100644 --- a/docs/docs-next/docs/concepts/assets/asset-dependencies.md +++ b/docs/docs-next/docs/concepts/assets/asset-dependencies.md @@ -3,4 +3,4 @@ title: "Asset dependencies" sidebar_position: 30 --- -# Asset dependencies \ No newline at end of file +# Asset dependencies diff --git a/docs/docs-next/docs/concepts/assets/asset-materialization.md b/docs/docs-next/docs/concepts/assets/asset-materialization.md index fa5d0043d088f..63da7bdd3b87c 100644 --- a/docs/docs-next/docs/concepts/assets/asset-materialization.md +++ b/docs/docs-next/docs/concepts/assets/asset-materialization.md @@ -3,4 +3,4 @@ title: "Asset materialization" sidebar_position: 20 --- -# Asset materialization \ No newline at end of file +# Asset materialization diff --git a/docs/docs-next/docs/concepts/assets/asset-metadata.md b/docs/docs-next/docs/concepts/assets/asset-metadata.md index 1681bcbf52686..ab6bccf70e303 100644 --- a/docs/docs-next/docs/concepts/assets/asset-metadata.md +++ b/docs/docs-next/docs/concepts/assets/asset-metadata.md @@ -3,4 +3,4 @@ title: "Asset metadata" sidebar_position: 40 --- -# Asset metadata \ No newline at end of file +# Asset metadata diff --git a/docs/docs-next/docs/concepts/assets/thinking-in-assets.md b/docs/docs-next/docs/concepts/assets/thinking-in-assets.md index 78675fba1317b..0079ea32a280b 100644 --- a/docs/docs-next/docs/concepts/assets/thinking-in-assets.md +++ b/docs/docs-next/docs/concepts/assets/thinking-in-assets.md @@ -3,4 +3,4 @@ title: "Thinking in assets" sidebar_position: 10 --- -# Thinking in assets \ No newline at end of file +# Thinking in assets diff --git a/docs/docs-next/docs/concepts/execution.md b/docs/docs-next/docs/concepts/execution.md index b0301fd3fcea7..a2e753b626cec 100644 --- a/docs/docs-next/docs/concepts/execution.md +++ b/docs/docs-next/docs/concepts/execution.md @@ -1 +1 @@ -# Execution \ No newline at end of file +# Execution diff --git a/docs/docs-next/docs/concepts/ops-jobs.md b/docs/docs-next/docs/concepts/ops-jobs.md index 9dafc7edc1e32..0a7482315eea0 100644 --- a/docs/docs-next/docs/concepts/ops-jobs.md +++ b/docs/docs-next/docs/concepts/ops-jobs.md @@ -1 +1 @@ -# Ops and jobs \ No newline at end of file +# Ops and jobs diff --git a/docs/docs-next/docs/concepts/partitions.md b/docs/docs-next/docs/concepts/partitions.md index e13a76656017f..536fea24a6a06 100644 --- a/docs/docs-next/docs/concepts/partitions.md +++ b/docs/docs-next/docs/concepts/partitions.md @@ -2,4 +2,4 @@ title: "Partitions" --- -# Partitions \ No newline at end of file +# Partitions diff --git a/docs/docs-next/docs/concepts/resources.md b/docs/docs-next/docs/concepts/resources.md index 54fed86a065a5..3c1229ee1f13b 100644 --- a/docs/docs-next/docs/concepts/resources.md +++ b/docs/docs-next/docs/concepts/resources.md @@ -1 +1 @@ -# Resources \ No newline at end of file +# Resources diff --git a/docs/docs-next/docs/concepts/understanding-assets.md b/docs/docs-next/docs/concepts/understanding-assets.md index c235cf7d4ad59..adafa34768ace 100644 --- a/docs/docs-next/docs/concepts/understanding-assets.md +++ b/docs/docs-next/docs/concepts/understanding-assets.md @@ -1,9 +1,7 @@ --- title: Understanding Assets description: Understanding the concept of assets in Dagster -last_update: - date: 2024-08-11 - author: Pedram Navid +last_update: + date: 2024-08-11 + author: Pedram Navid --- - - diff --git a/docs/docs-next/docs/dagster-plus.md b/docs/docs-next/docs/dagster-plus.md index ec6b77e5e3bb3..61c264492f0eb 100644 --- a/docs/docs-next/docs/dagster-plus.md +++ b/docs/docs-next/docs/dagster-plus.md @@ -3,4 +3,4 @@ title: "Dagster+" displayed_sidebar: "dagsterPlus" --- -# Dagster+ \ No newline at end of file +# Dagster+ diff --git a/docs/docs-next/docs/guides.md b/docs/docs-next/docs/guides.md index 5b279db00c020..a4ccf7ac9573a 100644 --- a/docs/docs-next/docs/guides.md +++ b/docs/docs-next/docs/guides.md @@ -2,4 +2,4 @@ title: "Guides" --- -# Guides \ No newline at end of file +# Guides diff --git a/docs/docs-next/docs/guides/automation.md b/docs/docs-next/docs/guides/automation.md index 43a33ac23c798..915eb4e32afe1 100644 --- a/docs/docs-next/docs/guides/automation.md +++ b/docs/docs-next/docs/guides/automation.md @@ -1,19 +1,17 @@ --- title: "Automating Pipelines" description: Learn how to automate your data pipelines. -last_update: - date: 2024-08-12 - author: Pedram Navid +last_update: + date: 2024-08-12 + author: Pedram Navid --- -Automation is key to building reliable, efficient data pipelines. -This guide provides a simplified overview of the main ways to automate processes in Dagster, -helping you choose the right method for your needs. You will find links to more detailed guides for each method below. +Automation is key to building reliable, efficient data pipelines. This guide provides a simplified overview of the main ways to automate processes in Dagster, helping you choose the right method for your needs. You will find links to more detailed guides for each method below. -## What You'll Learn +## What you'll learn - The different automation options available in Dagster -- How to implement basic scheduling and event-based triggers +- How to implement basic scheduling and event-based triggers - Best practices for selecting and using automation methods
@@ -26,7 +24,7 @@ Before continuing, you should be familiar with:
-## Automation Methods Overview +## Automation methods overview Dagster offers several ways to automate pipeline execution: @@ -34,9 +32,7 @@ Dagster offers several ways to automate pipeline execution: 2. [Sensors](#sensors) - Trigger runs based on events 3. [Asset Sensors](#asset-sensors) - Trigger jobs when specific assets materialize -Let's look at each method in more detail. - -## Schedules +## Schedules Schedules allow you to run jobs at specified times, like "every Monday at 9 AM" or "daily at midnight." A schedule combines a selection of assets, known as a [Job](/concepts/ops-jobs), and a [cron expression](https://en.wikipedia.org/wiki/Cron) @@ -44,14 +40,14 @@ in order to define when the job should be run. To make creating cron expressions easier, you can use an online tool like [Crontab Guru](https://crontab.guru/). -### When to use Schedules +### When to use schedules - You need to run jobs at regular intervals - You want basic time-based automation -For examples of how to create schedules, see the [How-To Use Schedules](/guides/automation/schedules) guide. +For examples of how to create schedules, see [How-To Use Schedules](/guides/automation/schedules). -For more information about how Schedules work, see the [About Schedules](/concepts/schedules) concept page. +For more information about how Schedules work, see [About Schedules](/concepts/schedules). ## Sensors @@ -72,7 +68,7 @@ For more examples of how to create sensors, see the [How-To Use Sensors](/guides For more information about how Sensors work, see the [About Sensors](/concepts/sensors) concept page. -## Asset Sensors +## Asset sensors Asset Sensors trigger jobs when specified assets are materialized, allowing you to create dependencies between jobs or code locations. diff --git a/docs/docs-next/docs/guides/data-assets/adding-metadata-to-assets.md b/docs/docs-next/docs/guides/data-assets/adding-metadata-to-assets.md index 42c5929899538..5c288165c7970 100644 --- a/docs/docs-next/docs/guides/data-assets/adding-metadata-to-assets.md +++ b/docs/docs-next/docs/guides/data-assets/adding-metadata-to-assets.md @@ -3,5 +3,3 @@ title: "Adding metadata to assets" sidebar_position: 40 sidebar_label: "Adding metadata" --- - -# Adding metadata to assets \ No newline at end of file diff --git a/docs/docs-next/docs/guides/data-assets/creating-asset-factories.md b/docs/docs-next/docs/guides/data-assets/creating-asset-factories.md index b0fb742c34927..07e558b2674dc 100644 --- a/docs/docs-next/docs/guides/data-assets/creating-asset-factories.md +++ b/docs/docs-next/docs/guides/data-assets/creating-asset-factories.md @@ -3,5 +3,3 @@ title: "Creating asset factories" sidebar_position: 50 sidebar_label: "Creating asset factories" --- - -# Creating asset factories \ No newline at end of file diff --git a/docs/docs-next/docs/guides/data-assets/creating-data-assets.md b/docs/docs-next/docs/guides/data-assets/creating-data-assets.md index 6c5442dbef60c..12ede891cdcc4 100644 --- a/docs/docs-next/docs/guides/data-assets/creating-data-assets.md +++ b/docs/docs-next/docs/guides/data-assets/creating-data-assets.md @@ -3,5 +3,3 @@ title: "Creating data assets" sidebar_position: 10 sidebar_label: "Creating data assets" --- - -# Creating data assets \ No newline at end of file diff --git a/docs/docs-next/docs/guides/data-assets/creating-dependencies-between-assets.md b/docs/docs-next/docs/guides/data-assets/creating-dependencies-between-assets.md index c9ac2f5b55efe..ae22e4531cc0c 100644 --- a/docs/docs-next/docs/guides/data-assets/creating-dependencies-between-assets.md +++ b/docs/docs-next/docs/guides/data-assets/creating-dependencies-between-assets.md @@ -3,5 +3,3 @@ title: "Creating dependencies between assets" sidebar_position: 20 sidebar_label: "Creating asset dependencies" --- - -# Creating dependencies between assets \ No newline at end of file diff --git a/docs/docs-next/docs/guides/data-assets/passing-data-between-assets.md b/docs/docs-next/docs/guides/data-assets/passing-data-between-assets.md index 304dfe720eef0..7a5dffeca8875 100644 --- a/docs/docs-next/docs/guides/data-assets/passing-data-between-assets.md +++ b/docs/docs-next/docs/guides/data-assets/passing-data-between-assets.md @@ -3,9 +3,9 @@ title: How to Pass Data Between Assets description: Learn how to pass data between assets in Dagster sidebar_position: 30 sidebar_label: "Passing data between assets" -last_update: - date: 2024-08-11 - author: Pedram Navid +last_update: + date: 2024-08-11 + author: Pedram Navid --- As you develop your data pipeline, you'll likely need to pass data between assets. By the end of this guide, you'll have a solid understanding of the different approaches to passing data between assets and when to use each one. @@ -25,7 +25,7 @@ To follow the steps in this guide, you'll need: ## Overview -In Dagster, assets are the building blocks of your data pipeline and it's common to want to pass data between them. This guide will help you understand how to pass data between assets. +In Dagster, assets are the building blocks of your data pipeline and it's common to want to pass data between them. This guide will help you understand how to pass data between assets. There are three ways of passing data between assets: @@ -46,18 +46,21 @@ A common and recommended approach to passing data between assets is explicitly m In this example, the first asset opens a connection to the SQLite database and writes data to it. The second asset opens a connection to the same database and reads data from it. The dependency between the first asset and the second asset is made explicit through the asset's `deps` argument. The benefits of this approach are: + - It's explicit and easy to understand how data is stored and retrieved - You have maximum flexibility in terms of how and where data is stored, for example, based on environment The downsides of this approach are: + - You need to manage connections and transactions manually - You need to handle errors and edge cases, for example, if the database is down or if a connection is closed ## Move Data Between Assets Implicitly Using IO Managers -Dagster's IO Managers are a powerful feature that manages data between assets by defining how data is read from and written to external storage. They help separate business logic from I/O operations, reducing boilerplate code and making it easier to change where data is stored. +Dagster's IO Managers are a powerful feature that manages data between assets by defining how data is read from and written to external storage. They help separate business logic from I/O operations, reducing boilerplate code and making it easier to change where data is stored. I/O managers handle: + 1. **Input**: Reading data from storage and loading it into memory for use by dependent assets. 2. **Output**: Writing data to the configured storage location. @@ -74,14 +77,16 @@ each step would execute in a separate environment and would not have access to t ::: -The `people()` and `birds()` assets both write their dataframes to DuckDB +The `people()` and `birds()` assets both write their dataframes to DuckDB for persistent storage. The `combined_data()` asset requests data from both assets by adding them as parameters to the function, and the IO Manager handles the reading them from DuckDB and making them available to the `combined_data` function as dataframes. Note that when you use IO Managers you do not need to manually add the asset's dependencies through the `deps` argument. The benefits of this approach are: + - The reading and writing of data is handled by the IO Manager, reducing boilerplate code - It's easy to swap out different IO Managers based on environments without changing the underlying asset computation The downsides of this approach are: + - The IO Manager approach is less flexible should you need to customize how data is read or written to storage - Some decisions may be made by the IO Manager for you, such as naming conventions that can be hard to override. @@ -94,7 +99,7 @@ Consider this example: -This example downloads a zip file from Google Drive, unzips it, and loads the data into a pandas DataFrame. It relies on each asset running on the same file system to perform these operations. +This example downloads a zip file from Google Drive, unzips it, and loads the data into a pandas DataFrame. It relies on each asset running on the same file system to perform these operations. The assets are modeled as tasks, rather than as data assets. For more information on the difference between tasks and data assets, check out the [Thinking in Assets](/concepts/assets/thinking-in-assets) guide. @@ -107,18 +112,18 @@ instead within a single asset. This pipeline still assumes enough disk and memory available to handle the data, but for smaller datasets, it can work well. The benefits of this approach are: + - All the computation that defines how an asset is created is contained within a single asset, making it easier to understand and maintain - It can be faster than relying on external storage, and doesn't require the overhead of setting up additional compute instances. - The downsides of this approach are: + - It makes certain assumptions about how much data is being processed - It can be difficult to reuse functions across assets, since they're tightly coupled to the data they produce - It may not always be possible to swap functionality based on the environment you are running in. For example, if you are running in a cloud environment, you may not have access to the local file system. - --- ## Related Resources -TODO: add links to relevant API documentation here. \ No newline at end of file +TODO: add links to relevant API documentation here. diff --git a/docs/docs-next/docs/guides/data-assets/selecting-subsets-of-assets.md b/docs/docs-next/docs/guides/data-assets/selecting-subsets-of-assets.md index 0ab522b4a9628..0dee09365534c 100644 --- a/docs/docs-next/docs/guides/data-assets/selecting-subsets-of-assets.md +++ b/docs/docs-next/docs/guides/data-assets/selecting-subsets-of-assets.md @@ -3,5 +3,3 @@ title: "Selecting subsets of assets" sidebar_position: 60 sidebar_label: "Selecting assets" --- - -# Selecting subsets of assets \ No newline at end of file diff --git a/docs/docs-next/docs/guides/deployment.md b/docs/docs-next/docs/guides/deployment.md index 8d63acc98f040..02b64f71fe925 100644 --- a/docs/docs-next/docs/guides/deployment.md +++ b/docs/docs-next/docs/guides/deployment.md @@ -2,4 +2,4 @@ title: "Deployment" --- -# Deployment \ No newline at end of file +# Deployment diff --git a/docs/docs-next/docs/guides/deployment/aws.md b/docs/docs-next/docs/guides/deployment/aws.md index 98224df2dc07f..68645a9d51d5f 100644 --- a/docs/docs-next/docs/guides/deployment/aws.md +++ b/docs/docs-next/docs/guides/deployment/aws.md @@ -3,4 +3,4 @@ title: "Deploying to Amazon Web Services" sidebar_position: 1 --- -# Deploying to Amazon Web Services \ No newline at end of file +# Deploying to Amazon Web Services diff --git a/docs/docs-next/docs/guides/deployment/azure.md b/docs/docs-next/docs/guides/deployment/azure.md index cede803ff1987..8cfa39b06904c 100644 --- a/docs/docs-next/docs/guides/deployment/azure.md +++ b/docs/docs-next/docs/guides/deployment/azure.md @@ -3,4 +3,4 @@ title: "Deploying to Microsoft Azure" sidebar_position: 3 --- -# Deploying to Microsoft Azure \ No newline at end of file +# Deploying to Microsoft Azure diff --git a/docs/docs-next/docs/guides/deployment/building-a-data-mesh.md b/docs/docs-next/docs/guides/deployment/building-a-data-mesh.md index 67b9db5c2e35a..a4104a5d32b5c 100644 --- a/docs/docs-next/docs/guides/deployment/building-a-data-mesh.md +++ b/docs/docs-next/docs/guides/deployment/building-a-data-mesh.md @@ -3,4 +3,4 @@ title: "Building a data mesh" sidebar_position: 6 --- -# Building a data mesh \ No newline at end of file +# Building a data mesh diff --git a/docs/docs-next/docs/guides/deployment/dagster-plus.md b/docs/docs-next/docs/guides/deployment/dagster-plus.md index 75dd22977284c..3f6ba6a78e8a5 100644 --- a/docs/docs-next/docs/guides/deployment/dagster-plus.md +++ b/docs/docs-next/docs/guides/deployment/dagster-plus.md @@ -3,4 +3,4 @@ title: "Deploying to Dagster+" sidebar_position: 4 --- -# Deploying to Dagster+ \ No newline at end of file +# Deploying to Dagster+ diff --git a/docs/docs-next/docs/guides/deployment/gcp.md b/docs/docs-next/docs/guides/deployment/gcp.md index 4a87508a3a4e0..90d787f06fe1c 100644 --- a/docs/docs-next/docs/guides/deployment/gcp.md +++ b/docs/docs-next/docs/guides/deployment/gcp.md @@ -3,4 +3,4 @@ title: "Deploying to Google Cloud Platform" sidebar_position: 2 --- -# Deploying to Google Cloud Platform \ No newline at end of file +# Deploying to Google Cloud Platform diff --git a/docs/docs-next/docs/guides/deployment/managing-code-locations.md b/docs/docs-next/docs/guides/deployment/managing-code-locations.md index 14891420283fd..0d4ce12fba399 100644 --- a/docs/docs-next/docs/guides/deployment/managing-code-locations.md +++ b/docs/docs-next/docs/guides/deployment/managing-code-locations.md @@ -3,4 +3,4 @@ title: "Managing code locations" sidebar_position: 5 --- -# Managing code locations \ No newline at end of file +# Managing code locations diff --git a/docs/docs-next/docs/guides/deployment/self-hosted-to-dagster-plus.md b/docs/docs-next/docs/guides/deployment/self-hosted-to-dagster-plus.md index 2fc360edc98f2..f72d198bd7ca2 100644 --- a/docs/docs-next/docs/guides/deployment/self-hosted-to-dagster-plus.md +++ b/docs/docs-next/docs/guides/deployment/self-hosted-to-dagster-plus.md @@ -3,4 +3,4 @@ title: "Migrating from self-hosted to Dagster+" sidebar_position: 7 --- -# Migrating from self-hosted to Dagster+ \ No newline at end of file +# Migrating from self-hosted to Dagster+ diff --git a/docs/docs-next/docs/guides/external-systems.md b/docs/docs-next/docs/guides/external-systems.md index 7400d41916895..cef8e5551d86a 100644 --- a/docs/docs-next/docs/guides/external-systems.md +++ b/docs/docs-next/docs/guides/external-systems.md @@ -2,4 +2,4 @@ title: "External systems" --- -# Data assets \ No newline at end of file +# Data assets diff --git a/docs/docs-next/docs/guides/external-systems/adding-python-libraries.md b/docs/docs-next/docs/guides/external-systems/adding-python-libraries.md index 3519c31b8997d..0b6055226ae0d 100644 --- a/docs/docs-next/docs/guides/external-systems/adding-python-libraries.md +++ b/docs/docs-next/docs/guides/external-systems/adding-python-libraries.md @@ -2,5 +2,3 @@ title: "Adding Python libraries" sidebar_position: 3 --- - -# Adding Python libraries \ No newline at end of file diff --git a/docs/docs-next/docs/guides/external-systems/connecting-databases.md b/docs/docs-next/docs/guides/external-systems/connecting-databases.md index acb0431aa503f..ef99d64489127 100644 --- a/docs/docs-next/docs/guides/external-systems/connecting-databases.md +++ b/docs/docs-next/docs/guides/external-systems/connecting-databases.md @@ -2,5 +2,3 @@ title: "Connecting databases" sidebar_position: 1 --- - -# Connecting databases \ No newline at end of file diff --git a/docs/docs-next/docs/guides/external-systems/using-api-connections.md b/docs/docs-next/docs/guides/external-systems/using-api-connections.md index e08505af2c9e4..3bde43668236f 100644 --- a/docs/docs-next/docs/guides/external-systems/using-api-connections.md +++ b/docs/docs-next/docs/guides/external-systems/using-api-connections.md @@ -3,4 +3,4 @@ title: "Using API connections" sidebar_position: 2 --- -# Using API connections \ No newline at end of file +# Using API connections diff --git a/docs/docs-next/docs/guides/monitoring.md b/docs/docs-next/docs/guides/monitoring.md index 57d4724ca8679..7997bf8f7d8c3 100644 --- a/docs/docs-next/docs/guides/monitoring.md +++ b/docs/docs-next/docs/guides/monitoring.md @@ -2,4 +2,4 @@ title: "Monitoring" --- -# Monitoring \ No newline at end of file +# Monitoring diff --git a/docs/docs-next/docs/guides/monitoring/custom-logging.md b/docs/docs-next/docs/guides/monitoring/custom-logging.md index c44714bea8047..a258048687d6d 100644 --- a/docs/docs-next/docs/guides/monitoring/custom-logging.md +++ b/docs/docs-next/docs/guides/monitoring/custom-logging.md @@ -3,4 +3,4 @@ title: "Setting up custom logging" sidebar_position: 1 --- -# Setting up custom logging \ No newline at end of file +# Setting up custom logging diff --git a/docs/docs-next/docs/guides/monitoring/custom-metrics.md b/docs/docs-next/docs/guides/monitoring/custom-metrics.md index 7752006d45cb8..ce124410b2bab 100644 --- a/docs/docs-next/docs/guides/monitoring/custom-metrics.md +++ b/docs/docs-next/docs/guides/monitoring/custom-metrics.md @@ -3,4 +3,4 @@ title: "Using custom metrics in logs" sidebar_position: 3 --- -# Using custom metrics in logs \ No newline at end of file +# Using custom metrics in logs diff --git a/docs/docs-next/docs/guides/monitoring/failed-run-alerts.md b/docs/docs-next/docs/guides/monitoring/failed-run-alerts.md index 0047e8a22b6fe..cd68c525a48e0 100644 --- a/docs/docs-next/docs/guides/monitoring/failed-run-alerts.md +++ b/docs/docs-next/docs/guides/monitoring/failed-run-alerts.md @@ -3,4 +3,4 @@ title: "Alerting on failed runs" sidebar_position: 1 --- -# Alerting on failed runs \ No newline at end of file +# Alerting on failed runs diff --git a/docs/docs-next/docs/guides/testing.md b/docs/docs-next/docs/guides/testing.md index 3aca5a9263da1..0c346fe97ee1f 100644 --- a/docs/docs-next/docs/guides/testing.md +++ b/docs/docs-next/docs/guides/testing.md @@ -1,5 +1,3 @@ --- title: "Testing" --- - -# Testing \ No newline at end of file diff --git a/docs/docs-next/docs/guides/testing/detecting-schema-changes.md b/docs/docs-next/docs/guides/testing/detecting-schema-changes.md index 69ea31491d141..796b64fe96832 100644 --- a/docs/docs-next/docs/guides/testing/detecting-schema-changes.md +++ b/docs/docs-next/docs/guides/testing/detecting-schema-changes.md @@ -2,5 +2,3 @@ title: "Detecting schema changes" sidebar_position: 4 --- - -# Detecting schema changes \ No newline at end of file diff --git a/docs/docs-next/docs/guides/testing/integration-tests.md b/docs/docs-next/docs/guides/testing/integration-tests.md index f577a725c42d9..748231d1f88f0 100644 --- a/docs/docs-next/docs/guides/testing/integration-tests.md +++ b/docs/docs-next/docs/guides/testing/integration-tests.md @@ -3,4 +3,4 @@ title: "Integration tests" sidebar_position: 3 --- -# Integration tests \ No newline at end of file +# Integration tests diff --git a/docs/docs-next/docs/guides/testing/stopping-runs.md b/docs/docs-next/docs/guides/testing/stopping-runs.md index 2bc64d10711e9..55ff00b26b315 100644 --- a/docs/docs-next/docs/guides/testing/stopping-runs.md +++ b/docs/docs-next/docs/guides/testing/stopping-runs.md @@ -3,4 +3,4 @@ title: "Stopping runs" sidebar_position: 5 --- -# Stopping runs \ No newline at end of file +# Stopping runs diff --git a/docs/docs-next/docs/guides/testing/testing-assets-with-asset-checks.md b/docs/docs-next/docs/guides/testing/testing-assets-with-asset-checks.md index d9736f5a5a778..5a39a8fac337e 100644 --- a/docs/docs-next/docs/guides/testing/testing-assets-with-asset-checks.md +++ b/docs/docs-next/docs/guides/testing/testing-assets-with-asset-checks.md @@ -3,4 +3,4 @@ title: "Testing assets with Asset Checks" sidebar_position: 1 --- -# Testing assets with Asset Checks \ No newline at end of file +# Testing assets with Asset Checks diff --git a/docs/docs-next/docs/guides/testing/testing-for-data-freshness.md b/docs/docs-next/docs/guides/testing/testing-for-data-freshness.md index f8b10723f1585..c6544930aacef 100644 --- a/docs/docs-next/docs/guides/testing/testing-for-data-freshness.md +++ b/docs/docs-next/docs/guides/testing/testing-for-data-freshness.md @@ -3,4 +3,4 @@ title: "Testing for data freshness" sidebar_position: 1 --- -# Testing for data freshness \ No newline at end of file +# Testing for data freshness diff --git a/docs/docs-next/docs/guides/testing/unit-tests-for-assets-and-ops.md b/docs/docs-next/docs/guides/testing/unit-tests-for-assets-and-ops.md index a7a9c3c1b0e51..fc112bc6824e0 100644 --- a/docs/docs-next/docs/guides/testing/unit-tests-for-assets-and-ops.md +++ b/docs/docs-next/docs/guides/testing/unit-tests-for-assets-and-ops.md @@ -3,4 +3,4 @@ title: "Unit tests for assets and ops" sidebar_position: 2 --- -# Unit tests for assets and ops \ No newline at end of file +# Unit tests for assets and ops diff --git a/docs/docs-next/docs/guides/transformation.md b/docs/docs-next/docs/guides/transformation.md index 0383a7f5eecb0..eb5d1dd3b9edd 100644 --- a/docs/docs-next/docs/guides/transformation.md +++ b/docs/docs-next/docs/guides/transformation.md @@ -2,4 +2,4 @@ title: "Transformation" --- -# Transformation \ No newline at end of file +# Transformation diff --git a/docs/docs-next/docs/guides/transformation/pushing-operations-to-data-warehouses.md b/docs/docs-next/docs/guides/transformation/pushing-operations-to-data-warehouses.md index b81b2e20b312d..343d4f97fe9c6 100644 --- a/docs/docs-next/docs/guides/transformation/pushing-operations-to-data-warehouses.md +++ b/docs/docs-next/docs/guides/transformation/pushing-operations-to-data-warehouses.md @@ -2,5 +2,3 @@ title: "Pushing operations to data warehouses" sidebar_position: 2 --- - -# Pushing operations to data warehouses \ No newline at end of file diff --git a/docs/docs-next/docs/guides/transformation/working-with-different-data-formats.md b/docs/docs-next/docs/guides/transformation/working-with-different-data-formats.md index 854a6f32ab193..8d27bb745ebe7 100644 --- a/docs/docs-next/docs/guides/transformation/working-with-different-data-formats.md +++ b/docs/docs-next/docs/guides/transformation/working-with-different-data-formats.md @@ -2,5 +2,3 @@ title: "Working with different data file formats" sidebar_position: 3 --- - -# Working with different data file formats \ No newline at end of file diff --git a/docs/docs-next/docs/guides/transformation/working-with-large-datasets.md b/docs/docs-next/docs/guides/transformation/working-with-large-datasets.md index a731a28604358..2410d094b79aa 100644 --- a/docs/docs-next/docs/guides/transformation/working-with-large-datasets.md +++ b/docs/docs-next/docs/guides/transformation/working-with-large-datasets.md @@ -2,5 +2,3 @@ title: "Working with large datasets" sidebar_position: 1 --- - -# Working with large datasets \ No newline at end of file diff --git a/docs/docs-next/docs/integrations.md b/docs/docs-next/docs/integrations.md index c112b1f4063ed..93781a0437102 100644 --- a/docs/docs-next/docs/integrations.md +++ b/docs/docs-next/docs/integrations.md @@ -2,4 +2,4 @@ title: "Integrations" --- -# Integrations \ No newline at end of file +# Integrations diff --git a/docs/docs-next/docs/intro.md b/docs/docs-next/docs/intro.md index cbfb65747693e..d446615346b9c 100644 --- a/docs/docs-next/docs/intro.md +++ b/docs/docs-next/docs/intro.md @@ -3,6 +3,6 @@ title: Welcome slug: / --- -# Dagster Documentation +# Dagster documentation -Welcome to Dagster's documentation. \ No newline at end of file +Welcome to Dagster's documentation. diff --git a/docs/docs-next/docs/partials/_InspirationList.md b/docs/docs-next/docs/partials/_InspirationList.md index ba76d609c3c9a..0a1ae1cdd5707 100644 --- a/docs/docs-next/docs/partials/_InspirationList.md +++ b/docs/docs-next/docs/partials/_InspirationList.md @@ -4,4 +4,4 @@ If you're looking for additional inspiration, we recommend: - [**Dagster Open Platform**](https://github.com/dagster-io/dagster-open-platform), which is Dagster Lab's open-source data platform. This full-sized project contains real assets and other Dagster features used by the Dagster Labs team. - [**GitHub Discussions**](https://github.com/dagster-io/dagster/discussions), where you can ask questions and get inspired by the Dagster community -- [**The Awesome Dagster repository**](https://github.com/dagster-io/awesome-dagster), which is a collection of all awesome things related to Dagster, including other users' projects, talks, articles, and more \ No newline at end of file +- [**The Awesome Dagster repository**](https://github.com/dagster-io/awesome-dagster), which is a collection of all awesome things related to Dagster, including other users' projects, talks, articles, and more diff --git a/docs/docs-next/docs/tutorial/installation.md b/docs/docs-next/docs/tutorial/installation.md index 2fa4bc9d81ce4..939a9abe405b1 100644 --- a/docs/docs-next/docs/tutorial/installation.md +++ b/docs/docs-next/docs/tutorial/installation.md @@ -7,7 +7,6 @@ description: "Learn how to install Dagster" This guide will walk you through the steps to install Dagster, a data orchestrator for machine learning, analytics, and ETL. Follow the instructions below to get started with Dagster on your local machine. -
Prerequisites @@ -22,7 +21,7 @@ Before you begin, ensure you have the following prerequisites installed on your After installing Python, it's a good idea to setup a virtual environment to isolate your Dagster project from the rest of your system. -There are many ways to setup a virtual environment. One method that requires no +There are many ways to setup a virtual environment. One method that requires no additional dependencies is to use `venv`. ```bash @@ -42,7 +41,6 @@ pip install dagster dagster-webserver This command will install the core Dagster library and the webserver, which is used to serve the Dagster UI. - ## Verify Installation To verify that Dagster is installed correctly, you can run the following command: @@ -70,7 +68,7 @@ pip install 'dagster==1.7.6' ## Conclusion -Congratulations! You have successfully installed Dagster +Congratulations! You have successfully installed Dagster ## Troubleshooting @@ -81,4 +79,4 @@ If you encounter any issues during the installation process, please refer to the - [Quickstart Tutorial](/tutorial/quick-start) - [ETL Tutorial](/tutorial/tutorial-etl) - [Create a new Dagster project](/tutorial/create-new-project) -- [Creating Data Assets](/guides/data-assets/creating-data-assets) \ No newline at end of file +- [Creating Data Assets](/guides/data-assets/creating-data-assets) diff --git a/docs/docs-next/docs/tutorial/introduction.md b/docs/docs-next/docs/tutorial/introduction.md index 1c573ca873646..2ffb16cb63dfb 100644 --- a/docs/docs-next/docs/tutorial/introduction.md +++ b/docs/docs-next/docs/tutorial/introduction.md @@ -1,6 +1,6 @@ --- -title: 'Introduction' -description: 'Welcome to the Dagster documentation! If this is your first time developing a Dagster pipeline, read through this Getting Started section to get familiar with the basics. Otherwise, feel free to explore our guides and API documentation!' +title: "Introduction" +description: "Welcome to the Dagster documentation! If this is your first time developing a Dagster pipeline, read through this Getting Started section to get familiar with the basics. Otherwise, feel free to explore our guides and API documentation!" slug: introduction hide_title: false --- diff --git a/docs/docs-next/docs/tutorial/quick-start.md b/docs/docs-next/docs/tutorial/quick-start.md index f1edcb273879f..01fc3f32927c4 100644 --- a/docs/docs-next/docs/tutorial/quick-start.md +++ b/docs/docs-next/docs/tutorial/quick-start.md @@ -1,9 +1,9 @@ --- title: Quickstart description: Learn how to quickly get up and running with Dagster -last_update: - date: 2024-08-10 - author: Pedram Navid +last_update: + date: 2024-08-10 + author: Pedram Navid --- # Dagster Tutorial: Building Your First Dagster Project @@ -36,7 +36,7 @@ First, set up a new Dagster project. ```bash title="Create a virtual environment" python -m venv venv - source venv/bin/activate + source venv/bin/activate # On Windows, use `venv\Scripts\activate` ``` @@ -52,9 +52,9 @@ Set up a basic project structure: :::warning -The file structure here is simplified to get quickly started. +The file structure here is simplified to get quickly started. -Once you've completed this tutorial, consider the [ETL Pipeline Tutorial](/tutorial/tutorial-etl) to learn +Once you've completed this tutorial, consider the [ETL Pipeline Tutorial](/tutorial/tutorial-etl) to learn how to build more complex pipelines with best practices. ::: @@ -75,8 +75,6 @@ how to build more complex pipelines with best practices. touch quickstart/__init__.py quickstart/assets.py touch data/sample_data.csv ``` - - 2. Create a sample CSV file as a data source. In the `data/sample_data.csv` file, add the following content: @@ -107,17 +105,18 @@ defs = Definitions(assets=[processed_data]) ``` This code defines a single data asset within a single computation that performs three steps: + - Reads data from the CSV file - Adds an `age_group` column based on the `age` - Saves the processed data to a CSV file -If you are used to task-based orchestrations, this might feel a bit different. +If you are used to task-based orchestrations, this might feel a bit different. In traditional task-based orchestrations, you would have three separate steps, but in Dagster, you model your pipelines using assets as the fundamental building block, rather than tasks. -The `Definitions` object serves as the central configuration point for a Dagster project. In this code, a `Definitions` -object is defined and the asset is passed to it. This tells Dagster about the assets that make up the ETL pipeline +The `Definitions` object serves as the central configuration point for a Dagster project. In this code, a `Definitions` +object is defined and the asset is passed to it. This tells Dagster about the assets that make up the ETL pipeline and allows Dagster to manage their execution and dependencies. ## Step 4: Run Your Pipeline @@ -136,14 +135,14 @@ There should be screenshots here!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 2. Open your web browser and go to `http://localhost:3000` -3. You should see the Dagster UI along with the asset. +3. You should see the Dagster UI along with the asset. -3. Click Materialize All to run the pipeline. +4. Click Materialize All to run the pipeline. -4. In the popup that appears, click View to view a run as it executes. +5. In the popup that appears, click View to view a run as it executes. -5. Watch as Dagster executes your pipeline. Try different views by selecting the different view buttons in the top-left. -You can click on each asset to see its logs and metadata. +6. Watch as Dagster executes your pipeline. Try different views by selecting the different view buttons in the top-left. + You can click on each asset to see its logs and metadata. ## Step 5: Verify Your Results @@ -168,4 +167,4 @@ Congratulations! You've just built and run your first pipeline with Dagster. You ## Next Steps - Continue with the [ETL Pipeline Tutorial](/tutorial/tutorial-etl) to learn how to build a more complex ETL pipeline -- Learn how to [Think in Assets](/concepts/assets/thinking-in-assets) \ No newline at end of file +- Learn how to [Think in Assets](/concepts/assets/thinking-in-assets) diff --git a/docs/docs-next/docs/tutorial/tutorial-etl.md b/docs/docs-next/docs/tutorial/tutorial-etl.md index 259e4621e2092..d5c6ff74e23fa 100644 --- a/docs/docs-next/docs/tutorial/tutorial-etl.md +++ b/docs/docs-next/docs/tutorial/tutorial-etl.md @@ -1,17 +1,17 @@ --- title: Building an ETL Pipeline description: Learn how to build an ETL pipeline with Dagster -last_update: - date: 2024-08-10 - author: Pedram Navid +last_update: + date: 2024-08-10 + author: Pedram Navid --- -# Dagster Tutorial: Building Your First ETL Pipeline +# Dagster tutorial: Building your first ETL pipeline Welcome to this hands-on tutorial where you'll learn how to build an ETL pipeline with Dagster while exploring key parts of Dagster. -If you haven't already, please complete the [Quick Start](/tutorial/quick-start) tutorial to get familiar with Dagster. +If you haven't already, complete the [Quick Start](/tutorial/quick-start) tutorial to get familiar with Dagster. -## What You'll Learn +## What you'll learn - Setting up a Dagster project with the recommended project structure - Creating Assets and using Resources to connect to external systems @@ -20,11 +20,7 @@ If you haven't already, please complete the [Quick Start](/tutorial/quick-start) - Running a pipeline by materializing assets - Adding schedules, sensors, and partitions to your assets -## Prerequisites - -- - -## Step 1: Set Up Your Dagster Environment +## Step 1: Set up your Dagster environment First, set up a new Dagster project. @@ -39,7 +35,7 @@ First, set up a new Dagster project. ```bash title="Create a virtual environment" python -m venv venv - source venv/bin/activate + source venv/bin/activate # On Windows, use `venv\Scripts\activate` ``` @@ -49,7 +45,7 @@ First, set up a new Dagster project. pip install dagster dagster-webserver pandas ``` -## What You've Learned +## What you've learned Congratulations! You've just built and run your first ETL pipeline with Dagster. You've learned how to: @@ -57,9 +53,10 @@ Congratulations! You've just built and run your first ETL pipeline with Dagster. - Define Software-Defined Assets for each step of your ETL process - Use Dagster's UI to run and monitor your pipeline -## Next Steps +## Next steps To expand on this tutorial, you could: + - Add more complex transformations - Implement error handling and retries - Create a schedule to run your pipeline periodically diff --git a/docs/docs-next/src/components/CodeExample.tsx b/docs/docs-next/src/components/CodeExample.tsx index e14da414aee17..ec87b14233136 100644 --- a/docs/docs-next/src/components/CodeExample.tsx +++ b/docs/docs-next/src/components/CodeExample.tsx @@ -1,5 +1,5 @@ -import React from 'react'; -import CodeBlock from '@theme/CodeBlock'; +import React from "react"; +import CodeBlock from "@theme/CodeBlock"; interface CodeExampleProps { filePath: string; @@ -7,38 +7,50 @@ interface CodeExampleProps { title?: string; } -const CodeExample: React.FC = ({ filePath, language, title }) => { - const [content, setContent] = React.useState(''); +const CodeExample: React.FC = ({ + filePath, + language, + title, +}) => { + const [content, setContent] = React.useState(""); const [error, setError] = React.useState(null); React.useEffect(() => { // Adjust the import path to start from the docs directory import(`!!raw-loader!/docs/${filePath}`) .then((module) => { - const lines = module.default.split('\n'); - const mainIndex = lines.findIndex(line => line.trim().startsWith('if __name__ == ')); - const strippedContent = mainIndex !== -1 ? lines.slice(0, mainIndex).join('\n') : module.default; + const lines = module.default.split("\n"); + const mainIndex = lines.findIndex((line) => + line.trim().startsWith("if __name__ == "), + ); + const strippedContent = + mainIndex !== -1 + ? lines.slice(0, mainIndex).join("\n") + : module.default; setContent(strippedContent); setError(null); }) .catch((error) => { console.error(`Error loading file: ${filePath}`, error); - setError(`Failed to load file: ${filePath}. Please check if the file exists and the path is correct.`); + setError( + `Failed to load file: ${filePath}. Please check if the file exists and the path is correct.`, + ); }); }, [filePath]); if (error) { - return
{error}
; + return ( +
+ {error} +
+ ); } return ( - - {content || 'Loading...'} + + {content || "Loading..."} ); }; -export default CodeExample; \ No newline at end of file +export default CodeExample; diff --git a/docs/docs-next/src/components/PyObject.tsx b/docs/docs-next/src/components/PyObject.tsx index a2524fd4b6dfc..de258e7fb5cba 100644 --- a/docs/docs-next/src/components/PyObject.tsx +++ b/docs/docs-next/src/components/PyObject.tsx @@ -9,16 +9,22 @@ export const PyObject: React.FunctionComponent<{ displayText?: string; pluralize?: boolean; decorator?: boolean; -}> = ({object, method, displayText, pluralize = false, decorator = false}) => { +}> = ({ + object, + method, + displayText, + pluralize = false, + decorator = false, +}) => { let textValue = displayText || object; if (pluralize) { - textValue += 's'; + textValue += "s"; } if (decorator) { - textValue = '@' + textValue; + textValue = "@" + textValue; } if (method) { - textValue += '.' + method; + textValue += "." + method; } const handleClick = (e: React.MouseEvent) => { @@ -36,4 +42,4 @@ export const PyObject: React.FunctionComponent<{ {textValue} ); -} +}; diff --git a/docs/docs-next/src/styles/index.tsx b/docs/docs-next/src/styles/index.tsx index 3ac6698859c8a..68a38518c8986 100644 --- a/docs/docs-next/src/styles/index.tsx +++ b/docs/docs-next/src/styles/index.tsx @@ -1,8 +1,7 @@ - export default function HomeWrapper() { - return ( -
-

Hello, World!

-
- ); -} \ No newline at end of file + return ( +
+

Hello, World!

+
+ ); +} diff --git a/docs/docs-next/src/templates/concept.md b/docs/docs-next/src/templates/concept.md index 79f5b515a53a5..4969d322ad0b8 100644 --- a/docs/docs-next/src/templates/concept.md +++ b/docs/docs-next/src/templates/concept.md @@ -93,4 +93,4 @@ From here, you can: ## Related - \ No newline at end of file + diff --git a/docs/docs-next/src/templates/example-reference.md b/docs/docs-next/src/templates/example-reference.md index 09f4df9301e20..b829418063a31 100644 --- a/docs/docs-next/src/templates/example-reference.md +++ b/docs/docs-next/src/templates/example-reference.md @@ -29,16 +29,16 @@ def logs_then_skips(context): -| | | -|-|-| -| Notes | | -| Related docs | | -| APIs in this example | | +| | | +| -------------------- | --- | +| Notes | | +| Related docs | | +| APIs in this example | | --- -import InspirationList from '../partials/_InspirationList.md'; +import InspirationList from '../partials/\_InspirationList.md'; - \ No newline at end of file + diff --git a/docs/docs-next/src/templates/guide-no-steps.md b/docs/docs-next/src/templates/guide-no-steps.md index aeaf84c948fca..92bca8b94ba51 100644 --- a/docs/docs-next/src/templates/guide-no-steps.md +++ b/docs/docs-next/src/templates/guide-no-steps.md @@ -25,16 +25,16 @@ To follow the steps in this guide, you'll need: ## Title that describes this section - --- ## Related -[List of links to related content] \ No newline at end of file +[List of links to related content] diff --git a/docs/docs-next/src/templates/guide-with-steps.md b/docs/docs-next/src/templates/guide-with-steps.md index ced99f32d16ff..6fd0aa836e6ec 100644 --- a/docs/docs-next/src/templates/guide-with-steps.md +++ b/docs/docs-next/src/templates/guide-with-steps.md @@ -25,11 +25,11 @@ To follow the steps in this guide, you'll need: ## Step 1: Title that describes what this step will do {#step-1} - @@ -46,4 +46,4 @@ For section / step heaings: ## Related -[List of links to related content] \ No newline at end of file +[List of links to related content] diff --git a/docs/docs-next/src/theme/MDXComponents.tsx b/docs/docs-next/src/theme/MDXComponents.tsx index 37c4ed3b0c833..8d19ef33ea206 100644 --- a/docs/docs-next/src/theme/MDXComponents.tsx +++ b/docs/docs-next/src/theme/MDXComponents.tsx @@ -2,8 +2,8 @@ import MDXComponents from "@theme-original/MDXComponents"; import { PyObject } from "../components/PyObject"; import CodeExample from "../components/CodeExample"; -import Tabs from '@theme/Tabs'; -import TabItem from '@theme/TabItem'; +import Tabs from "@theme/Tabs"; +import TabItem from "@theme/TabItem"; export default { // Re-use the default mapping ...MDXComponents, @@ -11,4 +11,4 @@ export default { Tabs, TabItem, CodeExample, -}; \ No newline at end of file +}; diff --git a/docs/docs-next/tsconfig.json b/docs/docs-next/tsconfig.json index b1b36ccc812bc..89eec2beff4c7 100644 --- a/docs/docs-next/tsconfig.json +++ b/docs/docs-next/tsconfig.json @@ -3,20 +3,9 @@ "compilerOptions": { "baseUrl": "./", "paths": { - "@/*": [ - "src/*" - ] + "@/*": ["src/*"] }, - "types": [ - "@docusaurus/theme-classic", - "node" - ] + "types": ["@docusaurus/theme-classic", "node"] }, - "exclude": [ - "**/node_modules/*", - "blog", - "docs", - "build", - "i18n" - ], -} \ No newline at end of file + "exclude": ["**/node_modules/*", "blog", "docs", "build", "i18n"] +}