Skip to content

Commit

Permalink
Add: Support for loading multiple modules (#22327)
Browse files Browse the repository at this point in the history
## Summary & Motivation

> Copied the same motivation that was stated back here, where it wasn't
proceeded further #12551

It would be nice to load multiple modules using pyproject.toml so users
don't have to type out all the modules they want to load using the CLI
command. See issue #11439
and discussion #11167.

I tried to add the recommendations mentioned here:
#12551 (comment)

## How I Tested These Changes
I added a new unit test in dagster_tests/cli_tests/test_toml_loading.py


## Changes:
### Add: Support for loading multiple modules from `pyproject.toml` in
Dagster (#4bad886)

-------------------------------------------------------------------------------------
* Updated documentation to include example of loading multiple modules
(`DagsterDevTabs.mdx`)
* Added `is_valid_modules_list` function to validate module lists in
`pyproject.toml` (`load_target.py`)
* Modified `get_origins_from_toml` to support loading multiple modules
from `pyproject.toml` (`load_target.py`)
* Added new test case `test_load_multiple_modules_from_toml` to verify
loading multiple modules (`test_toml_loading.py`)
* Created a new TOML test file `multiple_modules.toml` to simulate
loading multiple modules (`toml_tests/multiple_modules.toml`)
  • Loading branch information
bdart authored Sep 13, 2024
1 parent 5de01b2 commit 2f0664d
Show file tree
Hide file tree
Showing 7 changed files with 161 additions and 6 deletions.
7 changes: 7 additions & 0 deletions docs/next/components/mdx/includes/dagster/DagsterDevTabs.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,13 @@ Instead of this:
dagster dev -m your_module_name.definitions
```

You can also include multiple modules at a time using the `pyproject.toml` file, where each module will be loaded as a code location:

```toml
[tool.dagster]
modules = [{ type = "module", name = "foo" }, { type = "module", name = "bar" }]
```

---

</TabItem>
Expand Down
70 changes: 65 additions & 5 deletions python_modules/dagster/dagster/_core/workspace/load_target.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import os
from abc import ABC, abstractmethod
from typing import NamedTuple, Optional, Sequence
from typing import Dict, List, NamedTuple, Optional, Sequence

import tomli

Expand All @@ -24,7 +24,8 @@ def create_origins(self) -> Sequence[CodeLocationOrigin]:


class CompositeTarget(
NamedTuple("CompositeTarget", [("targets", Sequence[WorkspaceLoadTarget])]), WorkspaceLoadTarget
NamedTuple("CompositeTarget", [("targets", Sequence[WorkspaceLoadTarget])]),
WorkspaceLoadTarget,
):
def create_origins(self):
origins = []
Expand All @@ -40,21 +41,80 @@ def create_origins(self) -> Sequence[CodeLocationOrigin]:
return location_origins_from_yaml_paths(self.paths)


def get_origins_from_toml(path: str) -> Sequence[ManagedGrpcPythonEnvCodeLocationOrigin]:
def validate_dagster_block_for_module_name_or_modules(dagster_block):
module_name_present = "module_name" in dagster_block and isinstance(
dagster_block.get("module_name"), str
)
modules_present = "modules" in dagster_block and isinstance(dagster_block.get("modules"), list)

if module_name_present and modules_present:
# Here we have the check only for list; to be a bit more forgiving in comparison to 'is_valid_modules_list' in case it's an empty list next to 'module_name' existance
if len(dagster_block["modules"]) > 0:
raise ValueError(
"Only one of 'module_name' or 'modules' should be specified, not both."
)

if modules_present and len(dagster_block.get("modules")) == 0:
raise ValueError("'modules' list should not be empty if specified.")

return True


def is_valid_modules_list(modules: List[Dict[str, str]]) -> bool:
# Could be skipped theorectically, but double check maybe useful, if this functions finds it's way elsewhere
if not isinstance(modules, list):
raise ValueError("Modules should be a list.")

for index, item in enumerate(modules):
if not isinstance(item, dict):
raise ValueError(f"Item at index {index} is not a dictionary.")
if "type" not in item:
raise ValueError(f"Dictionary at index {index} does not contain the key 'type'.")
if not isinstance(item["type"], str):
raise ValueError(f"The 'type' value in dictionary at index {index} is not a string.")
if "name" not in item:
raise ValueError(f"Dictionary at index {index} does not contain the key 'name'.")
if not isinstance(item["name"], str):
raise ValueError(f"The 'name' value in dictionary at index {index} is not a string.")

return True


def get_origins_from_toml(
path: str,
) -> Sequence[ManagedGrpcPythonEnvCodeLocationOrigin]:
with open(path, "rb") as f:
data = tomli.load(f)
if not isinstance(data, dict):
return []

dagster_block = data.get("tool", {}).get("dagster", {})

if "module_name" in dagster_block or "modules" in dagster_block:
assert validate_dagster_block_for_module_name_or_modules(dagster_block) is True

if "module_name" in dagster_block:
return ModuleTarget(
module_name=dagster_block["module_name"],
module_name=dagster_block.get("module_name"),
attribute=None,
working_directory=os.getcwd(),
location_name=dagster_block.get("code_location_name"),
).create_origins()
return []
elif "modules" in dagster_block and is_valid_modules_list(dagster_block.get("modules")):
origins = []
for module in dagster_block.get("modules"):
if module.get("type") == "module":
origins.extend(
ModuleTarget(
module_name=module.get("name"),
attribute=None,
working_directory=os.getcwd(),
location_name=dagster_block.get("code_location_name"),
).create_origins()
)
return origins
else:
return []


class PyProjectFileTarget(NamedTuple("PyProjectFileTarget", [("path", str)]), WorkspaceLoadTarget):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[tool.dagster]
modules = []
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[tool.dagster]
modules = [
{ name = 1 },
{ name = "foo" },
{ type = "module" },
{ type = true },
{ bar = "foo" },
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[tool.dagster]
module_name = "foo"
modules = [
{ type = "module", name = "foo" },
{ type = "module", name = "bar" }
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[tool.dagster]
modules = [
{ type = "module", name = "foo" },
{ type = "module", name = "bar" }
] ## names of project's Python modules
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from dagster._core.workspace.load_target import get_origins_from_toml
from dagster._core.workspace.load_target import get_origins_from_toml, is_valid_modules_list
from dagster._utils import file_relative_path
from pytest import raises


def test_load_python_module_from_toml():
Expand All @@ -22,3 +23,69 @@ def test_load_empty_toml():

def test_load_toml_with_other_stuff():
assert get_origins_from_toml(file_relative_path(__file__, "other_stuff.toml")) == []


def test_load_multiple_modules_from_toml():
origins = get_origins_from_toml(file_relative_path(__file__, "multiple_modules.toml"))
assert len(origins) == 2

module_names = {origin.loadable_target_origin.module_name for origin in origins}
expected_module_names = {"foo", "bar"}

assert module_names == expected_module_names
for origin in origins:
assert origin.location_name in expected_module_names


def test_load_mixed_modules_and_module_name_from_toml():
with raises(
ValueError,
match="Only one of 'module_name' or 'modules' should be specified, not both.",
):
get_origins_from_toml(file_relative_path(__file__, "mixed_modules_and_module_name.toml"))


def test_load_invalid_empty_modules_from_toml():
with raises(ValueError, match="'modules' list should not be empty if specified."):
get_origins_from_toml(file_relative_path(__file__, "invalid_empty_modules.toml"))


def test_is_valid_modules_list_from_toml():
# Only matchess first error of many, rest is covered below
with raises(ValueError, match="Dictionary at index 0 does not contain the key 'type'."):
get_origins_from_toml(file_relative_path(__file__, "invalid_modules_dict.toml"))


def test_is_valid_modules_list_not_a_list():
with raises(ValueError, match="Modules should be a list."):
is_valid_modules_list("not a list")


def test_is_valid_modules_list_item_not_dict():
modules = ["not a dictionary"]
with raises(ValueError, match="Item at index 0 is not a dictionary."):
is_valid_modules_list(modules)


def test_is_valid_modules_list_missing_type():
modules = [{"name": "foo"}]
with raises(ValueError, match="Dictionary at index 0 does not contain the key 'type'."):
is_valid_modules_list(modules)


def test_is_valid_modules_list_type_not_string():
modules = [{"type": 123, "name": "foo"}]
with raises(ValueError, match="The 'type' value in dictionary at index 0 is not a string."):
is_valid_modules_list(modules)


def test_is_valid_modules_list_missing_name():
modules = [{"type": "module"}]
with raises(ValueError, match="Dictionary at index 0 does not contain the key 'name'."):
is_valid_modules_list(modules)


def test_is_valid_modules_list_name_not_string():
modules = [{"type": "module", "name": 123}]
with raises(ValueError, match="The 'name' value in dictionary at index 0 is not a string."):
is_valid_modules_list(modules)

1 comment on commit 2f0664d

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Deploy preview for dagster-docs ready!

✅ Preview
https://dagster-docs-4zmbold61-elementl.vercel.app
https://master.dagster.dagster-docs.io

Built with commit 2f0664d.
This pull request is being automatically deployed with vercel-action

Please sign in to comment.