From bd31eedaf4077784070bd5955c78235d50987b4c Mon Sep 17 00:00:00 2001 From: jamiedemaria Date: Thu, 29 Aug 2024 16:58:26 -0400 Subject: [PATCH] Guide for connecting to APIs (#23920) ## Summary & Motivation Guide for connecting to APIs using resources. I tried to overcorrect toward terseness, but very likely that this doesn't have enough explanation. Let me know and I can add more. ## How I Tested These Changes ## Changelog [New | Bug | Docs] `NOCHANGELOG` --- .../docs/guides/external-systems/apis.md | 78 ++++++++++++++++++- .../apis/env_var_configuration.py | 38 +++++++++ .../external-systems/apis/minimal_resource.py | 16 ++++ .../use_configurable_resource_in_asset.py | 41 ++++++++++ .../apis/use_minimal_resource_in_asset.py | 28 +++++++ 5 files changed, 200 insertions(+), 1 deletion(-) create mode 100644 examples/docs_beta_snippets/docs_beta_snippets/guides/external-systems/apis/env_var_configuration.py create mode 100644 examples/docs_beta_snippets/docs_beta_snippets/guides/external-systems/apis/minimal_resource.py create mode 100644 examples/docs_beta_snippets/docs_beta_snippets/guides/external-systems/apis/use_configurable_resource_in_asset.py create mode 100644 examples/docs_beta_snippets/docs_beta_snippets/guides/external-systems/apis/use_minimal_resource_in_asset.py diff --git a/docs/docs-beta/docs/guides/external-systems/apis.md b/docs/docs-beta/docs/guides/external-systems/apis.md index 6a068d1d1805c..bd2aa90048b1e 100644 --- a/docs/docs-beta/docs/guides/external-systems/apis.md +++ b/docs/docs-beta/docs/guides/external-systems/apis.md @@ -1,4 +1,80 @@ --- title: Connecting to APIs sidebar_position: 20 ---- \ No newline at end of file +--- + +When building a data pipeline, you'll likely need to connect to several external APIs, each with its own specific configuration and behavior. This guide demonstrates how to standardize your API connections and customize their configuration using Dagster resources. + + +## What you'll learn + +- How to connect to an API using a Dagster resource +- How to use that resource in an asset +- How to configure a resource +- How to source configuration values from environment variables + +
+ Prerequisites + +To follow the steps in this guide, you'll need: + +- Familiarity with [Asset definitions](/concepts/assets) +- Familiarity with [resources](/concepts/resources) +- Install the `requests` library: + ```bash + pip install requests + ``` + +
+ +## Step 1: Write a resource to connect to an API + +This example fetches the sunrise time for a given location from a REST API. + +Begin by defining a Dagster resource with a method to return the sunrise time for a location. In the first version of this resource, the location will be hard-coded to San Francisco International Airport. + + + + + +## Step 2: Use the resource in an asset + +To use the resource written in Step 1, you can provide it as a parameter to an asset after including it in the Definitions object: + + + +When you materialize `sfo_sunrise`, Dagster will provide an initialized `SunResource` to the `sun_resource` parameter. + + +## Step 3: Configure your resource +Many APIs have configuration you can set to customize your usage. Here is an updated version of the resource from Step 1 with configuration to allow for setting the query location: + + + +The configurable resource can be provided to an asset exactly as before. When the resource is initialized, you can pass values for each of the configuration options. + +When you materialize `sfo_sunrise`, Dagster will provide a `SunResource` initialized with the configuration values to the `sun_resource` parameter. + + +## Step 4: Source configuration values from environment variables +Resources can also be configured with environment variables. You can use Dagster's built-in `EnvVar` class to source configuration values from environment variables at materialization time. + +In this example, there is a new `home_sunrise` asset. Rather than hard-coding the location of your home, you can set it in environment variables, and configure the `SunResource` by reading those values: + + + +When you materialize `home_sunrise`, Dagster will read the values set for the `HOME_LATITUDE`, `HOME_LONGITUDE`, and `HOME_TIMZONE` environment variables and initialize a `SunResource` with those values. + +The initialized `SunResource` will be provided to the `sun_resource` parameter. + +:::note +You can also fetch environment variables using the `os` library. Dagster treats each approach to fetching environment variables differently, such as when they're fetched or how they display in the UI. Refer to the [Environment variables guide](/todo) for more information. +::: + + +## Next steps + +- [Authenticate to a resource](/guides/external-systems/authentication.md) +- [Use different resources in different execution environments](/todo) +- [Set environment variables in Dagster+](/todo) +- Learn what [Dagster-provided resources](/todo) are available to use diff --git a/examples/docs_beta_snippets/docs_beta_snippets/guides/external-systems/apis/env_var_configuration.py b/examples/docs_beta_snippets/docs_beta_snippets/guides/external-systems/apis/env_var_configuration.py new file mode 100644 index 0000000000000..ebcff9c228eb6 --- /dev/null +++ b/examples/docs_beta_snippets/docs_beta_snippets/guides/external-systems/apis/env_var_configuration.py @@ -0,0 +1,38 @@ +import requests + +import dagster as dg + + +class SunResource(dg.ConfigurableResource): + latitude: str + longitude: str + time_zone: str + + @property + def query_string(self) -> str: + return f"https://api.sunrise-sunset.org/json?lat={self.latitude}&lng={self.longitude}&date=today&tzid={self.time_zone}" + + def sunrise(self) -> str: + data = requests.get(self.query_string, timeout=5).json() + return data["results"]["sunrise"] + + +# highlight-start +@dg.asset +def home_sunrise(context: dg.AssetExecutionContext, sun_resource: SunResource) -> None: + sunrise = sun_resource.sunrise() + context.log.info(f"Sunrise at home is at {sunrise}.") + + +defs = dg.Definitions( + assets=[home_sunrise], + resources={ + "sun_resource": SunResource( + latitude=dg.EnvVar("HOME_LATITUDE"), + longitude=dg.EnvVar("HOME_LONGITUDE"), + time_zone=dg.EnvVar("HOME_TIMEZONE"), + ) + }, +) + +# highlight-end diff --git a/examples/docs_beta_snippets/docs_beta_snippets/guides/external-systems/apis/minimal_resource.py b/examples/docs_beta_snippets/docs_beta_snippets/guides/external-systems/apis/minimal_resource.py new file mode 100644 index 0000000000000..076bf3b0569f1 --- /dev/null +++ b/examples/docs_beta_snippets/docs_beta_snippets/guides/external-systems/apis/minimal_resource.py @@ -0,0 +1,16 @@ +import requests + +import dagster as dg + + +class SunResource(dg.ConfigurableResource): + @property + def query_string(self) -> str: + latittude = "37.615223" + longitude = "-122.389977" + time_zone = "America/Los_Angeles" + return f"https://api.sunrise-sunset.org/json?lat={latittude}&lng={longitude}&date=today&tzid={time_zone}" + + def sunrise(self) -> str: + data = requests.get(self.query_string, timeout=5).json() + return data["results"]["sunrise"] diff --git a/examples/docs_beta_snippets/docs_beta_snippets/guides/external-systems/apis/use_configurable_resource_in_asset.py b/examples/docs_beta_snippets/docs_beta_snippets/guides/external-systems/apis/use_configurable_resource_in_asset.py new file mode 100644 index 0000000000000..2950e815f828a --- /dev/null +++ b/examples/docs_beta_snippets/docs_beta_snippets/guides/external-systems/apis/use_configurable_resource_in_asset.py @@ -0,0 +1,41 @@ +import requests + +import dagster as dg + + +class SunResource(dg.ConfigurableResource): + # highlight-start + latitude: str + longitude: str + time_zone: str + + @property + def query_string(self) -> str: + return f"https://api.sunrise-sunset.org/json?lat={self.latittude}&lng={self.longitude}&date=today&tzid={self.time_zone}" + + # highlight-end + + def sunrise(self) -> str: + data = requests.get(self.query_string, timeout=5).json() + return data["results"]["sunrise"] + + +@dg.asset +def sfo_sunrise(context: dg.AssetExecutionContext, sun_resource: SunResource) -> None: + sunrise = sun_resource.sunrise() + context.log.info(f"Sunrise in San Francisco is at {sunrise}.") + + +# highlight-start +defs = dg.Definitions( + assets=[sfo_sunrise], + resources={ + "sun_resource": SunResource( + latitude="37.615223", + longitude="-122.389977", + time_zone="America/Los_Angeles", + ) + }, +) + +# highlight-end diff --git a/examples/docs_beta_snippets/docs_beta_snippets/guides/external-systems/apis/use_minimal_resource_in_asset.py b/examples/docs_beta_snippets/docs_beta_snippets/guides/external-systems/apis/use_minimal_resource_in_asset.py new file mode 100644 index 0000000000000..93e73956d9be0 --- /dev/null +++ b/examples/docs_beta_snippets/docs_beta_snippets/guides/external-systems/apis/use_minimal_resource_in_asset.py @@ -0,0 +1,28 @@ +import requests + +import dagster as dg + + +class SunResource(dg.ConfigurableResource): + @property + def query_string(self) -> str: + latittude = "37.615223" + longitude = "-122.389977" + time_zone = "America/Los_Angeles" + return f"https://api.sunrise-sunset.org/json?lat={latittude}&lng={longitude}&date=today&tzid={time_zone}" + + def sunrise(self) -> str: + data = requests.get(self.query_string, timeout=5).json() + return data["results"]["sunrise"] + + +# highlight-start +@dg.asset +def sfo_sunrise(context: dg.AssetExecutionContext, sun_resource: SunResource) -> None: + sunrise = sun_resource.sunrise() + context.log.info(f"Sunrise in San Francisco is at {sunrise}.") + + +defs = dg.Definitions(assets=[sfo_sunrise], resources={"sun_resource": SunResource()}) + +# highlight-end