From 2f0664d0410da7463f2d08ce2405c1948c2d64ad Mon Sep 17 00:00:00 2001 From: Daniel <4836770+bdart@users.noreply.github.com> Date: Fri, 13 Sep 2024 19:36:22 +0200 Subject: [PATCH] Add: Support for loading multiple modules (#22327) ## Summary & Motivation > Copied the same motivation that was stated back here, where it wasn't proceeded further https://github.com/dagster-io/dagster/pull/12551 It would be nice to load multiple modules using pyproject.toml so users don't have to type out all the modules they want to load using the CLI command. See issue https://github.com/dagster-io/dagster/issues/11439 and discussion https://github.com/dagster-io/dagster/discussions/11167. I tried to add the recommendations mentioned here: https://github.com/dagster-io/dagster/pull/12551#issuecomment-1460574415 ## How I Tested These Changes I added a new unit test in dagster_tests/cli_tests/test_toml_loading.py ## Changes: ### Add: Support for loading multiple modules from `pyproject.toml` in Dagster (#4bad886) ------------------------------------------------------------------------------------- * Updated documentation to include example of loading multiple modules (`DagsterDevTabs.mdx`) * Added `is_valid_modules_list` function to validate module lists in `pyproject.toml` (`load_target.py`) * Modified `get_origins_from_toml` to support loading multiple modules from `pyproject.toml` (`load_target.py`) * Added new test case `test_load_multiple_modules_from_toml` to verify loading multiple modules (`test_toml_loading.py`) * Created a new TOML test file `multiple_modules.toml` to simulate loading multiple modules (`toml_tests/multiple_modules.toml`) --- .../mdx/includes/dagster/DagsterDevTabs.mdx | 7 ++ .../dagster/_core/workspace/load_target.py | 70 +++++++++++++++++-- .../toml_tests/invalid_empty_modules.toml | 2 + .../toml_tests/invalid_modules_dict.toml | 8 +++ .../mixed_modules_and_module_name.toml | 6 ++ .../toml_tests/multiple_modules.toml | 5 ++ .../toml_tests/test_toml_loading.py | 69 +++++++++++++++++- 7 files changed, 161 insertions(+), 6 deletions(-) create mode 100644 python_modules/dagster/dagster_tests/cli_tests/workspace_tests/toml_tests/invalid_empty_modules.toml create mode 100644 python_modules/dagster/dagster_tests/cli_tests/workspace_tests/toml_tests/invalid_modules_dict.toml create mode 100644 python_modules/dagster/dagster_tests/cli_tests/workspace_tests/toml_tests/mixed_modules_and_module_name.toml create mode 100644 python_modules/dagster/dagster_tests/cli_tests/workspace_tests/toml_tests/multiple_modules.toml diff --git a/docs/next/components/mdx/includes/dagster/DagsterDevTabs.mdx b/docs/next/components/mdx/includes/dagster/DagsterDevTabs.mdx index 9286434cbb8e6..5de04bc188bc6 100644 --- a/docs/next/components/mdx/includes/dagster/DagsterDevTabs.mdx +++ b/docs/next/components/mdx/includes/dagster/DagsterDevTabs.mdx @@ -65,6 +65,13 @@ Instead of this: dagster dev -m your_module_name.definitions ``` +You can also include multiple modules at a time using the `pyproject.toml` file, where each module will be loaded as a code location: + +```toml +[tool.dagster] +modules = [{ type = "module", name = "foo" }, { type = "module", name = "bar" }] +``` + --- diff --git a/python_modules/dagster/dagster/_core/workspace/load_target.py b/python_modules/dagster/dagster/_core/workspace/load_target.py index 97b0ae199aac3..33f20190645b8 100644 --- a/python_modules/dagster/dagster/_core/workspace/load_target.py +++ b/python_modules/dagster/dagster/_core/workspace/load_target.py @@ -1,6 +1,6 @@ import os from abc import ABC, abstractmethod -from typing import NamedTuple, Optional, Sequence +from typing import Dict, List, NamedTuple, Optional, Sequence import tomli @@ -24,7 +24,8 @@ def create_origins(self) -> Sequence[CodeLocationOrigin]: class CompositeTarget( - NamedTuple("CompositeTarget", [("targets", Sequence[WorkspaceLoadTarget])]), WorkspaceLoadTarget + NamedTuple("CompositeTarget", [("targets", Sequence[WorkspaceLoadTarget])]), + WorkspaceLoadTarget, ): def create_origins(self): origins = [] @@ -40,21 +41,80 @@ def create_origins(self) -> Sequence[CodeLocationOrigin]: return location_origins_from_yaml_paths(self.paths) -def get_origins_from_toml(path: str) -> Sequence[ManagedGrpcPythonEnvCodeLocationOrigin]: +def validate_dagster_block_for_module_name_or_modules(dagster_block): + module_name_present = "module_name" in dagster_block and isinstance( + dagster_block.get("module_name"), str + ) + modules_present = "modules" in dagster_block and isinstance(dagster_block.get("modules"), list) + + if module_name_present and modules_present: + # Here we have the check only for list; to be a bit more forgiving in comparison to 'is_valid_modules_list' in case it's an empty list next to 'module_name' existance + if len(dagster_block["modules"]) > 0: + raise ValueError( + "Only one of 'module_name' or 'modules' should be specified, not both." + ) + + if modules_present and len(dagster_block.get("modules")) == 0: + raise ValueError("'modules' list should not be empty if specified.") + + return True + + +def is_valid_modules_list(modules: List[Dict[str, str]]) -> bool: + # Could be skipped theorectically, but double check maybe useful, if this functions finds it's way elsewhere + if not isinstance(modules, list): + raise ValueError("Modules should be a list.") + + for index, item in enumerate(modules): + if not isinstance(item, dict): + raise ValueError(f"Item at index {index} is not a dictionary.") + if "type" not in item: + raise ValueError(f"Dictionary at index {index} does not contain the key 'type'.") + if not isinstance(item["type"], str): + raise ValueError(f"The 'type' value in dictionary at index {index} is not a string.") + if "name" not in item: + raise ValueError(f"Dictionary at index {index} does not contain the key 'name'.") + if not isinstance(item["name"], str): + raise ValueError(f"The 'name' value in dictionary at index {index} is not a string.") + + return True + + +def get_origins_from_toml( + path: str, +) -> Sequence[ManagedGrpcPythonEnvCodeLocationOrigin]: with open(path, "rb") as f: data = tomli.load(f) if not isinstance(data, dict): return [] dagster_block = data.get("tool", {}).get("dagster", {}) + + if "module_name" in dagster_block or "modules" in dagster_block: + assert validate_dagster_block_for_module_name_or_modules(dagster_block) is True + if "module_name" in dagster_block: return ModuleTarget( - module_name=dagster_block["module_name"], + module_name=dagster_block.get("module_name"), attribute=None, working_directory=os.getcwd(), location_name=dagster_block.get("code_location_name"), ).create_origins() - return [] + elif "modules" in dagster_block and is_valid_modules_list(dagster_block.get("modules")): + origins = [] + for module in dagster_block.get("modules"): + if module.get("type") == "module": + origins.extend( + ModuleTarget( + module_name=module.get("name"), + attribute=None, + working_directory=os.getcwd(), + location_name=dagster_block.get("code_location_name"), + ).create_origins() + ) + return origins + else: + return [] class PyProjectFileTarget(NamedTuple("PyProjectFileTarget", [("path", str)]), WorkspaceLoadTarget): diff --git a/python_modules/dagster/dagster_tests/cli_tests/workspace_tests/toml_tests/invalid_empty_modules.toml b/python_modules/dagster/dagster_tests/cli_tests/workspace_tests/toml_tests/invalid_empty_modules.toml new file mode 100644 index 0000000000000..8c47941014854 --- /dev/null +++ b/python_modules/dagster/dagster_tests/cli_tests/workspace_tests/toml_tests/invalid_empty_modules.toml @@ -0,0 +1,2 @@ +[tool.dagster] +modules = [] \ No newline at end of file diff --git a/python_modules/dagster/dagster_tests/cli_tests/workspace_tests/toml_tests/invalid_modules_dict.toml b/python_modules/dagster/dagster_tests/cli_tests/workspace_tests/toml_tests/invalid_modules_dict.toml new file mode 100644 index 0000000000000..01eb788c0c804 --- /dev/null +++ b/python_modules/dagster/dagster_tests/cli_tests/workspace_tests/toml_tests/invalid_modules_dict.toml @@ -0,0 +1,8 @@ +[tool.dagster] +modules = [ + { name = 1 }, + { name = "foo" }, + { type = "module" }, + { type = true }, + { bar = "foo" }, +] \ No newline at end of file diff --git a/python_modules/dagster/dagster_tests/cli_tests/workspace_tests/toml_tests/mixed_modules_and_module_name.toml b/python_modules/dagster/dagster_tests/cli_tests/workspace_tests/toml_tests/mixed_modules_and_module_name.toml new file mode 100644 index 0000000000000..4a3aad81b4000 --- /dev/null +++ b/python_modules/dagster/dagster_tests/cli_tests/workspace_tests/toml_tests/mixed_modules_and_module_name.toml @@ -0,0 +1,6 @@ +[tool.dagster] +module_name = "foo" +modules = [ + { type = "module", name = "foo" }, + { type = "module", name = "bar" } +] \ No newline at end of file diff --git a/python_modules/dagster/dagster_tests/cli_tests/workspace_tests/toml_tests/multiple_modules.toml b/python_modules/dagster/dagster_tests/cli_tests/workspace_tests/toml_tests/multiple_modules.toml new file mode 100644 index 0000000000000..4bad8866e03cd --- /dev/null +++ b/python_modules/dagster/dagster_tests/cli_tests/workspace_tests/toml_tests/multiple_modules.toml @@ -0,0 +1,5 @@ +[tool.dagster] +modules = [ + { type = "module", name = "foo" }, + { type = "module", name = "bar" } +] ## names of project's Python modules \ No newline at end of file diff --git a/python_modules/dagster/dagster_tests/cli_tests/workspace_tests/toml_tests/test_toml_loading.py b/python_modules/dagster/dagster_tests/cli_tests/workspace_tests/toml_tests/test_toml_loading.py index e3c627b5b4084..4b3fbf4d4744e 100644 --- a/python_modules/dagster/dagster_tests/cli_tests/workspace_tests/toml_tests/test_toml_loading.py +++ b/python_modules/dagster/dagster_tests/cli_tests/workspace_tests/toml_tests/test_toml_loading.py @@ -1,5 +1,6 @@ -from dagster._core.workspace.load_target import get_origins_from_toml +from dagster._core.workspace.load_target import get_origins_from_toml, is_valid_modules_list from dagster._utils import file_relative_path +from pytest import raises def test_load_python_module_from_toml(): @@ -22,3 +23,69 @@ def test_load_empty_toml(): def test_load_toml_with_other_stuff(): assert get_origins_from_toml(file_relative_path(__file__, "other_stuff.toml")) == [] + + +def test_load_multiple_modules_from_toml(): + origins = get_origins_from_toml(file_relative_path(__file__, "multiple_modules.toml")) + assert len(origins) == 2 + + module_names = {origin.loadable_target_origin.module_name for origin in origins} + expected_module_names = {"foo", "bar"} + + assert module_names == expected_module_names + for origin in origins: + assert origin.location_name in expected_module_names + + +def test_load_mixed_modules_and_module_name_from_toml(): + with raises( + ValueError, + match="Only one of 'module_name' or 'modules' should be specified, not both.", + ): + get_origins_from_toml(file_relative_path(__file__, "mixed_modules_and_module_name.toml")) + + +def test_load_invalid_empty_modules_from_toml(): + with raises(ValueError, match="'modules' list should not be empty if specified."): + get_origins_from_toml(file_relative_path(__file__, "invalid_empty_modules.toml")) + + +def test_is_valid_modules_list_from_toml(): + # Only matchess first error of many, rest is covered below + with raises(ValueError, match="Dictionary at index 0 does not contain the key 'type'."): + get_origins_from_toml(file_relative_path(__file__, "invalid_modules_dict.toml")) + + +def test_is_valid_modules_list_not_a_list(): + with raises(ValueError, match="Modules should be a list."): + is_valid_modules_list("not a list") + + +def test_is_valid_modules_list_item_not_dict(): + modules = ["not a dictionary"] + with raises(ValueError, match="Item at index 0 is not a dictionary."): + is_valid_modules_list(modules) + + +def test_is_valid_modules_list_missing_type(): + modules = [{"name": "foo"}] + with raises(ValueError, match="Dictionary at index 0 does not contain the key 'type'."): + is_valid_modules_list(modules) + + +def test_is_valid_modules_list_type_not_string(): + modules = [{"type": 123, "name": "foo"}] + with raises(ValueError, match="The 'type' value in dictionary at index 0 is not a string."): + is_valid_modules_list(modules) + + +def test_is_valid_modules_list_missing_name(): + modules = [{"type": "module"}] + with raises(ValueError, match="Dictionary at index 0 does not contain the key 'name'."): + is_valid_modules_list(modules) + + +def test_is_valid_modules_list_name_not_string(): + modules = [{"type": "module", "name": 123}] + with raises(ValueError, match="The 'name' value in dictionary at index 0 is not a string."): + is_valid_modules_list(modules)