Skip to content

Commit

Permalink
make mdx-format
Browse files Browse the repository at this point in the history
  • Loading branch information
cmpadden committed Feb 22, 2024
1 parent 714a694 commit a65ef43
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 88 deletions.
79 changes: 37 additions & 42 deletions docs/content/getting-started/quickstart.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -17,23 +17,22 @@ You can run the [Dagster Quickstart](https://github.com/dagster-io/dagster-quick

If you've followed the instructions in the README of **Dagster Quickstart** project, you should have a running instance of Dagster! From here, we can run our data pipeline.

In the UI, click **Materialize All** when viewing the lineage of assets. In Dagster, *materialization* is the word we use to describe running the code associated with an asset
and producing an output.
In the UI, click **Materialize All** when viewing the lineage of assets. In Dagster, _materialization_ is the word we use to describe running the code associated with an asset and producing an output.

<Image
alt="HackerNews assets in Dagster's Asset Graph, unmaterialized"
src="/images/getting-started/quickstart/quickstart-unmaterialized.png"
width={1000}
height={816}
alt="HackerNews assets in Dagster's Asset Graph, unmaterialized"
src="/images/getting-started/quickstart/quickstart-unmaterialized.png"
width={1000}
height={816}
/>

That's it! You now have two materialized Dagster assets:

<Image
alt="HackerNews asset graph"
src="/images/getting-started/quickstart/quickstart.png"
width={2402}
height={1956}
alt="HackerNews asset graph"
src="/images/getting-started/quickstart/quickstart.png"
width={2402}
height={1956}
/>

But wait - there's more. Because the `hackernews_top_stories` asset returned some `metadata`, you can view the metadata right in the UI:
Expand All @@ -42,61 +41,57 @@ But wait - there's more. Because the `hackernews_top_stories` asset returned som
2. In the sidebar, click the **Show Markdown** link in the **Materialization in Last Run** section. This opens a preview of the pipeline result, allowing you to view the top 10 HackerNews stories:

<Image
alt="Markdown preview of HackerNews top 10 stories"
src="/images/getting-started/quickstart/hn-preview.png"
width={3444}
height={1754}
alt="Markdown preview of HackerNews top 10 stories"
src="/images/getting-started/quickstart/hn-preview.png"
width={3444}
height={1754}
/>

## The Code, Explained

```python file=/getting-started/quickstart.py
```python file=/getting-started/quickstart/assets.py
import json
import requests

import pandas as pd
import requests

from dagster import AssetExecutionContext, MetadataValue, asset
from dagster import (
MaterializeResult,
MetadataValue,
asset,
)
from dagster_quickstart.configurations import HNStoriesConfig


@asset
def hackernews_top_story_ids():
"""Get top stories from the HackerNews top stories endpoint.
API Docs: https://github.com/HackerNews/API#new-top-and-best-stories.
"""
top_story_ids = requests.get(
"https://hacker-news.firebaseio.com/v0/topstories.json"
).json()
def hackernews_top_story_ids(config: HNStoriesConfig):
"""Get top stories from the HackerNews top stories endpoint."""
top_story_ids = requests.get("https://hacker-news.firebaseio.com/v0/topstories.json").json()

with open("hackernews_top_story_ids.json", "w") as f:
json.dump(top_story_ids[:10], f)
with open(config.hn_top_story_ids_path, "w") as f:
json.dump(top_story_ids[: config.top_stories_limit], f)


# asset dependencies can be inferred from parameter names
@asset(deps=[hackernews_top_story_ids])
def hackernews_top_stories(context: AssetExecutionContext):
def hackernews_top_stories(config: HNStoriesConfig) -> MaterializeResult:
"""Get items based on story ids from the HackerNews items endpoint."""
with open("hackernews_top_story_ids.json", "r") as f:
with open(config.hn_top_story_ids_path, "r") as f:
hackernews_top_story_ids = json.load(f)

results = []
for item_id in hackernews_top_story_ids:
item = requests.get(
f"https://hacker-news.firebaseio.com/v0/item/{item_id}.json"
).json()
item = requests.get(f"https://hacker-news.firebaseio.com/v0/item/{item_id}.json").json()
results.append(item)

df = pd.DataFrame(results)
df.to_csv("hackernews_top_stories.csv")

# recorded metadata can be customized
metadata = {
"num_records": len(df),
"preview": MetadataValue.md(df[["title", "by", "url"]].to_markdown()),
}

context.add_output_metadata(metadata=metadata)
df.to_csv(config.hn_top_stories_path)

return MaterializeResult(
metadata={
"num_records": len(df),
"preview": MetadataValue.md(str(df[["title", "by", "url"]].to_markdown())),
}
)
```

---
Expand Down
46 changes: 0 additions & 46 deletions examples/docs_snippets/docs_snippets/getting-started/quickstart.py

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import json

import pandas as pd
import requests
from dagster_quickstart.configurations import HNStoriesConfig

from dagster import (
MaterializeResult,
MetadataValue,
asset,
)


@asset
def hackernews_top_story_ids(config: HNStoriesConfig):
"""Get top stories from the HackerNews top stories endpoint."""
top_story_ids = requests.get(
"https://hacker-news.firebaseio.com/v0/topstories.json"
).json()

with open(config.hn_top_story_ids_path, "w") as f:
json.dump(top_story_ids[: config.top_stories_limit], f)


@asset(deps=[hackernews_top_story_ids])
def hackernews_top_stories(config: HNStoriesConfig) -> MaterializeResult:
"""Get items based on story ids from the HackerNews items endpoint."""
with open(config.hn_top_story_ids_path, "r") as f:
hackernews_top_story_ids = json.load(f)

results = []
for item_id in hackernews_top_story_ids:
item = requests.get(
f"https://hacker-news.firebaseio.com/v0/item/{item_id}.json"
).json()
results.append(item)

df = pd.DataFrame(results)
df.to_csv(config.hn_top_stories_path)

return MaterializeResult(
metadata={
"num_records": len(df),
"preview": MetadataValue.md(str(df[["title", "by", "url"]].to_markdown())),
}
)

0 comments on commit a65ef43

Please sign in to comment.