diff --git a/docs/dagster-university/next-env.d.ts b/docs/dagster-university/next-env.d.ts index 4f11a03dc6cc3..a4a7b3f5cfa2f 100644 --- a/docs/dagster-university/next-env.d.ts +++ b/docs/dagster-university/next-env.d.ts @@ -2,4 +2,4 @@ /// // NOTE: This file should not be edited -// see https://nextjs.org/docs/basic-features/typescript for more information. +// see https://nextjs.org/docs/pages/building-your-application/configuring/typescript for more information. diff --git a/docs/dagster-university/pages/dagster-essentials/lesson-4/coding-practice-taxi-zones-asset.md b/docs/dagster-university/pages/dagster-essentials/lesson-4/coding-practice-taxi-zones-asset.md index 92eab5b00a8b5..e06be9dc0e1e4 100644 --- a/docs/dagster-university/pages/dagster-essentials/lesson-4/coding-practice-taxi-zones-asset.md +++ b/docs/dagster-university/pages/dagster-essentials/lesson-4/coding-practice-taxi-zones-asset.md @@ -41,6 +41,13 @@ def taxi_zones() -> None: ); """ - conn = duckdb.connect(os.getenv("DUCKDB_DATABASE")) + conn = backoff( + fn=duckdb.connect, + retry_on=(RuntimeError, duckdb.IOException), + kwargs={ + "database": os.getenv("DUCKDB_DATABASE"), + }, + max_retries=10, + ) conn.execute(sql_query) ``` diff --git a/docs/dagster-university/pages/dagster-essentials/lesson-4/coding-practice-trips-by-week-asset.md b/docs/dagster-university/pages/dagster-essentials/lesson-4/coding-practice-trips-by-week-asset.md index 8067d27dd281a..b5c5e90b1bfad 100644 --- a/docs/dagster-university/pages/dagster-essentials/lesson-4/coding-practice-trips-by-week-asset.md +++ b/docs/dagster-university/pages/dagster-essentials/lesson-4/coding-practice-trips-by-week-asset.md @@ -62,12 +62,20 @@ from datetime import datetime, timedelta from . import constants import pandas as pd +from dagster._utils.backoff import backoff @asset( deps=["taxi_trips"] ) def trips_by_week() -> None: - conn = duckdb.connect(os.getenv("DUCKDB_DATABASE")) + conn = backoff( + fn=duckdb.connect, + retry_on=(RuntimeError, duckdb.IOException), + kwargs={ + "database": os.getenv("DUCKDB_DATABASE"), + }, + max_retries=10, + ) current_date = datetime.strptime("2023-03-01", constants.DATE_FORMAT) end_date = datetime.strptime("2023-04-01", constants.DATE_FORMAT) diff --git a/docs/dagster-university/pages/dagster-essentials/lesson-4/loading-data-into-a-database.md b/docs/dagster-university/pages/dagster-essentials/lesson-4/loading-data-into-a-database.md index 0bf9ced624484..ec69d9cb43690 100644 --- a/docs/dagster-university/pages/dagster-essentials/lesson-4/loading-data-into-a-database.md +++ b/docs/dagster-university/pages/dagster-essentials/lesson-4/loading-data-into-a-database.md @@ -13,6 +13,7 @@ Now that you have a query that produces an asset, let’s use Dagster to manage ```python import duckdb import os + from dagster._utils.backoff import backoff ``` 2. Copy and paste the code below into the bottom of the `trips.py` file. Note how this code looks similar to the asset definition code for the `taxi_trips_file` and the `taxi_zones` assets: @@ -42,7 +43,14 @@ Now that you have a query that produces an asset, let’s use Dagster to manage ); """ - conn = duckdb.connect(os.getenv("DUCKDB_DATABASE")) + conn = backoff( + fn=duckdb.connect, + retry_on=(RuntimeError, duckdb.IOException), + kwargs={ + "database": os.getenv("DUCKDB_DATABASE"), + }, + max_retries=10, + ) conn.execute(sql_query) ``` @@ -54,7 +62,7 @@ Now that you have a query that produces an asset, let’s use Dagster to manage 3. Next, a variable named `sql_query` is created. This variable contains a SQL query that creates a table named `trips`, which sources its data from the `data/raw/taxi_trips_2023-03.parquet` file. This is the file created by the `taxi_trips_file` asset. - 4. A variable named `conn` is created, which defines the connection to the DuckDB database in the project. To do this, it uses the `.connect` method from the `duckdb` library, passing in the `DUCKDB_DATABASE` environment variable to tell DuckDB where the database is located. + 4. A variable named `conn` is created, which defines the connection to the DuckDB database in the project. To do this, we first wrap everything with the Dagster utility function `backoff`. Using the backoff function ensures that multiple assets can use the DuckDB safely without locking resources. The backoff function takes in function we want to call, in this case the `.connect` method from the `duckdb` library, any errors to retry on (`RuntimeError` and `duckdb.IOException`), the max number of retires and finally the args to supply to the `.connect` DuckDB method. In this case we are passing in the `DUCKDB_DATABASE` environment variable to tell DuckDB where the database is located. The `DUCKDB_DATABASE` environment variable, sourced from your project’s `.env` file, resolves to `data/staging/data.duckdb`. **Note**: We set up this file in Lesson 2 - refer to this lesson if you need a refresher. If this file isn’t set up correctly, the materialization will result in an error. diff --git a/docs/dagster-university/pages/dagster-essentials/lesson-6/setting-up-a-database-resource.md b/docs/dagster-university/pages/dagster-essentials/lesson-6/setting-up-a-database-resource.md index 311748941755c..9dadbf149bfa1 100644 --- a/docs/dagster-university/pages/dagster-essentials/lesson-6/setting-up-a-database-resource.md +++ b/docs/dagster-university/pages/dagster-essentials/lesson-6/setting-up-a-database-resource.md @@ -14,7 +14,14 @@ Throughout this module, you’ve used DuckDB to store and transform your data. E ) def taxi_trips() -> None: ... - conn = duckdb.connect(os.getenv("DUCKDB_DATABASE")) + conn = backoff( + fn=duckdb.connect, + retry_on=(RuntimeError, duckdb.IOException), + kwargs={ + "database": os.getenv("DUCKDB_DATABASE"), + }, + max_retries=10, + ) ... ``` diff --git a/docs/dagster-university/pages/dagster-essentials/lesson-6/using-resources-in-assets.md b/docs/dagster-university/pages/dagster-essentials/lesson-6/using-resources-in-assets.md index 7e27a7e555ab1..12b1ffbcc547e 100644 --- a/docs/dagster-university/pages/dagster-essentials/lesson-6/using-resources-in-assets.md +++ b/docs/dagster-university/pages/dagster-essentials/lesson-6/using-resources-in-assets.md @@ -48,7 +48,14 @@ def taxi_trips() -> None: ); """ - conn = duckdb.connect(os.getenv("DUCKDB_DATABASE")) + conn = backoff( + fn=duckdb.connect, + retry_on=(RuntimeError, duckdb.IOException), + kwargs={ + "database": os.getenv("DUCKDB_DATABASE"), + }, + max_retries=10, + ) conn.execute(sql_query) ``` @@ -100,7 +107,14 @@ To refactor `taxi_trips` to use the `database` resource, we had to: 3. Replace the lines that connect to DuckDB and execute a query: ```python - conn = duckdb.connect(os.getenv("DUCKDB_DATABASE")) + conn = backoff( + fn=duckdb.connect, + retry_on=(RuntimeError, duckdb.IOException), + kwargs={ + "database": os.getenv("DUCKDB_DATABASE"), + }, + max_retries=10, + ) conn.execute(query) ``` @@ -111,6 +125,8 @@ To refactor `taxi_trips` to use the `database` resource, we had to: conn.execute(query) ``` + Notice that we no longer need to use the `backoff` function. The Dagster `DuckDBResource` handles this functionality for us. + --- ## Before you continue