From 4637108ebd5e2a3e4e43672a404ebff1913c19b7 Mon Sep 17 00:00:00 2001 From: Sean Mackesey Date: Wed, 11 Dec 2024 20:10:30 -0500 Subject: [PATCH] [components] Improve docstrings and clean up code in dagster-dg (#26406) ## Summary & Motivation Improve the docstrings for `dg` commands. Also convert them to use `Path` instead of `str` where possible. ## How I Tested These Changes Existing test suite. --- .../dagster-dg/dagster_dg/cli/generate.py | 81 ++++++++++++++++--- .../dagster-dg/dagster_dg/cli/list.py | 10 +-- .../dagster-dg/dagster_dg/generate.py | 10 +-- .../libraries/dagster-dg/dagster_dg/utils.py | 22 +++-- .../cli_tests/test_generate_commands.py | 4 +- .../cli_tests/test_list_commands.py | 6 +- 6 files changed, 93 insertions(+), 40 deletions(-) diff --git a/python_modules/libraries/dagster-dg/dagster_dg/cli/generate.py b/python_modules/libraries/dagster-dg/dagster_dg/cli/generate.py index 5034c1b5943fd..a8c18bd490b86 100644 --- a/python_modules/libraries/dagster-dg/dagster_dg/cli/generate.py +++ b/python_modules/libraries/dagster-dg/dagster_dg/cli/generate.py @@ -25,9 +25,13 @@ def generate_cli() -> None: @generate_cli.command(name="deployment") -@click.argument("path", type=str) -def generate_deployment_command(path: str) -> None: - """Generate a Dagster deployment instance.""" +@click.argument("path", type=Path) +def generate_deployment_command(path: Path) -> None: + """Generate a Dagster deployment file structure. + + The deployment file structure includes a directory for code locations and configuration files + for deploying to Dagster Plus. + """ dir_abspath = os.path.abspath(path) if os.path.exists(dir_abspath): click.echo( @@ -42,15 +46,37 @@ def generate_deployment_command(path: str) -> None: @click.argument("name", type=str) @click.option("--use-editable-dagster", is_flag=True, default=False) def generate_code_location_command(name: str, use_editable_dagster: bool) -> None: - """Generate a Dagster code location inside a component.""" + """Generate a Dagster code location file structure and a uv-managed virtual environment scoped + to the code location. + + This command can be run inside or outside of a deployment directory. If run inside a deployment, + the code location will be created within the deployment directory's code location directory. + + The code location file structure defines a Python package with some pre-existing internal + structure: + + ├── + │ ├── __init__.py + │ ├── components + │ ├── definitions.py + │ └── lib + │ └── __init__.py + ├── _tests + │ └── __init__.py + └── pyproject.toml + + The `.components` directory holds components (which can be created with `dg generate + component`). The `.lib` directory holds custom component types scoped to the code + location (which can be created with `dg generate component-type`). + """ if is_inside_deployment_project(Path.cwd()): context = DeploymentProjectContext.from_path(Path.cwd()) if context.has_code_location(name): click.echo(click.style(f"A code location named {name} already exists.", fg="red")) sys.exit(1) - code_location_path = os.path.join(context.code_location_root_path, name) + code_location_path = context.code_location_root_path / name else: - code_location_path = os.path.join(Path.cwd(), name) + code_location_path = Path.cwd() / name if use_editable_dagster: if "DAGSTER_GIT_REPO_DIR" not in os.environ: @@ -71,11 +97,15 @@ def generate_code_location_command(name: str, use_editable_dagster: bool) -> Non @generate_cli.command(name="component-type") @click.argument("name", type=str) def generate_component_type_command(name: str) -> None: - """Generate a Dagster component instance.""" + """Generate a scaffold of a custom Dagster component type. + + This command must be run inside a Dagster code location directory. The component type scaffold + will be generated in submodule `.lib.`. + """ if not is_inside_code_location_project(Path.cwd()): click.echo( click.style( - "This command must be run inside a Dagster code location project.", fg="red" + "This command must be run inside a Dagster code location directory.", fg="red" ) ) sys.exit(1) @@ -85,13 +115,16 @@ def generate_component_type_command(name: str) -> None: click.echo(click.style(f"A component type named `{name}` already exists.", fg="red")) sys.exit(1) - generate_component_type(context.component_types_root_path, name) + generate_component_type(Path(context.component_types_root_path), name) @generate_cli.command(name="component") -@click.argument("component_type", type=str) +@click.argument( + "component_type", + type=str, +) @click.argument("component_name", type=str) -@click.option("--json-params", type=str, default=None) +@click.option("--json-params", type=str, default=None, help="JSON string of component parameters.") @click.argument("extra_args", nargs=-1, type=str) def generate_component_command( component_type: str, @@ -99,10 +132,32 @@ def generate_component_command( json_params: Optional[str], extra_args: Tuple[str, ...], ) -> None: + """Generate a scaffold of a Dagster component. + + This command must be run inside a Dagster code location directory. The component scaffold will be + generated in submodule `.components.`. + + The COMPONENT_TYPE must be a registered component type in the code location environment. + You can view all registered component types with `dg list component-types`. The COMPONENT_NAME + will be used to name the submodule created under .components. + + Components can optionally be passed generate parameters. There are two ways to do this: + + - Passing --json-params with a JSON string of parameters. For example: + + dg generate component foo.bar my_component --json-params '{"param1": "value", "param2": "value"}'`. + + - Passing key-value pairs as space-separated EXTRA_ARGS after `--`. For example: + + dg generate component foo.bar my_component -- param1=value param2=value + + When key-value pairs are used, the value type will be inferred from the + underlying component generation schema. + """ if not is_inside_code_location_project(Path.cwd()): click.echo( click.style( - "This command must be run inside a Dagster code location project.", fg="red" + "This command must be run inside a Dagster code location directory.", fg="red" ) ) sys.exit(1) @@ -120,7 +175,7 @@ def generate_component_command( sys.exit(1) generate_component_instance( - context.component_instances_root_path, + Path(context.component_instances_root_path), component_name, component_type, json_params, diff --git a/python_modules/libraries/dagster-dg/dagster_dg/cli/list.py b/python_modules/libraries/dagster-dg/dagster_dg/cli/list.py index 1cb7f0a91d2de..3ad2e561b16af 100644 --- a/python_modules/libraries/dagster-dg/dagster_dg/cli/list.py +++ b/python_modules/libraries/dagster-dg/dagster_dg/cli/list.py @@ -21,7 +21,7 @@ def list_code_locations_command() -> None: """List code locations in the current deployment.""" if not is_inside_deployment_project(Path.cwd()): click.echo( - click.style("This command must be run inside a Dagster deployment project.", fg="red") + click.style("This command must be run inside a Dagster deployment directory.", fg="red") ) sys.exit(1) @@ -32,11 +32,11 @@ def list_code_locations_command() -> None: @list_cli.command(name="component-types") def list_component_types_command() -> None: - """List registered Dagster components.""" + """List registered Dagster components in the current code location environment.""" if not is_inside_code_location_project(Path.cwd()): click.echo( click.style( - "This command must be run inside a Dagster code location project.", fg="red" + "This command must be run inside a Dagster code location directory.", fg="red" ) ) sys.exit(1) @@ -48,11 +48,11 @@ def list_component_types_command() -> None: @list_cli.command(name="components") def list_components_command() -> None: - """List Dagster component instances in a code location.""" + """List Dagster component instances defined in the current code location.""" if not is_inside_code_location_project(Path.cwd()): click.echo( click.style( - "This command must be run inside a Dagster code location project.", fg="red" + "This command must be run inside a Dagster code location directory.", fg="red" ) ) sys.exit(1) diff --git a/python_modules/libraries/dagster-dg/dagster_dg/generate.py b/python_modules/libraries/dagster-dg/dagster_dg/generate.py index 1ca5c72bad6be..f469b9c824e94 100644 --- a/python_modules/libraries/dagster-dg/dagster_dg/generate.py +++ b/python_modules/libraries/dagster-dg/dagster_dg/generate.py @@ -13,7 +13,7 @@ ) -def generate_deployment(path: str) -> None: +def generate_deployment(path: Path) -> None: click.echo(f"Creating a Dagster deployment at {path}.") generate_subtree( @@ -25,7 +25,7 @@ def generate_deployment(path: str) -> None: ) -def generate_code_location(path: str, editable_dagster_root: Optional[str] = None) -> None: +def generate_code_location(path: Path, editable_dagster_root: Optional[str] = None) -> None: click.echo(f"Creating a Dagster code location at {path}.") # Temporarily we always set an editable dagster root. This is needed while the packages are not @@ -65,7 +65,7 @@ def generate_code_location(path: str, editable_dagster_root: Optional[str] = Non execute_code_location_command(Path(path), ("uv", "sync")) -def generate_component_type(root_path: str, name: str) -> None: +def generate_component_type(root_path: Path, name: str) -> None: click.echo(f"Creating a Dagster component type at {root_path}/{name}.py.") generate_subtree( @@ -79,7 +79,7 @@ def generate_component_type(root_path: str, name: str) -> None: def generate_component_instance( - root_path: str, + root_path: Path, name: str, component_type: str, json_params: Optional[str], @@ -87,7 +87,7 @@ def generate_component_instance( ) -> None: click.echo(f"Creating a Dagster component instance at {root_path}/{name}.py.") - component_instance_root_path = os.path.join(root_path, name) + component_instance_root_path = root_path / name generate_subtree( path=component_instance_root_path, name_placeholder="COMPONENT_INSTANCE_NAME_PLACEHOLDER", diff --git a/python_modules/libraries/dagster-dg/dagster_dg/utils.py b/python_modules/libraries/dagster-dg/dagster_dg/utils.py index 2452d563fee9c..cef8b663368e3 100644 --- a/python_modules/libraries/dagster-dg/dagster_dg/utils.py +++ b/python_modules/libraries/dagster-dg/dagster_dg/utils.py @@ -72,7 +72,7 @@ def snakecase(string: str) -> str: # Copied from dagster._generate.generate def generate_subtree( - path: str, + path: Path, excludes: Optional[List[str]] = None, name_placeholder: str = PROJECT_NAME_PLACEHOLDER, templates_path: str = PROJECT_NAME_PLACEHOLDER, @@ -87,9 +87,7 @@ def generate_subtree( if not os.path.exists(normalized_path): os.mkdir(normalized_path) - project_template_path: str = os.path.join( - os.path.dirname(__file__), "templates", templates_path - ) + project_template_path = os.path.join(os.path.dirname(__file__), "templates", templates_path) loader: jinja2.loaders.FileSystemLoader = jinja2.FileSystemLoader( searchpath=project_template_path ) @@ -99,17 +97,17 @@ def generate_subtree( for root, dirs, files in os.walk(project_template_path): # For each subdirectory in the source template, create a subdirectory in the destination. for dirname in dirs: - src_dir_path: str = os.path.join(root, dirname) + src_dir_path = os.path.join(root, dirname) if _should_skip_file(src_dir_path, excludes): continue - src_relative_dir_path: str = os.path.relpath(src_dir_path, project_template_path) - dst_relative_dir_path: str = src_relative_dir_path.replace( + src_relative_dir_path = os.path.relpath(src_dir_path, project_template_path) + dst_relative_dir_path = src_relative_dir_path.replace( name_placeholder, project_name, 1, ) - dst_dir_path: str = os.path.join(normalized_path, dst_relative_dir_path) + dst_dir_path = os.path.join(normalized_path, dst_relative_dir_path) os.mkdir(dst_dir_path) @@ -119,20 +117,20 @@ def generate_subtree( if _should_skip_file(src_file_path, excludes): continue - src_relative_file_path: str = os.path.relpath(src_file_path, project_template_path) - dst_relative_file_path: str = src_relative_file_path.replace( + src_relative_file_path = os.path.relpath(src_file_path, project_template_path) + dst_relative_file_path = src_relative_file_path.replace( name_placeholder, project_name, 1, ) - dst_file_path: str = os.path.join(normalized_path, dst_relative_file_path) + dst_file_path = os.path.join(normalized_path, dst_relative_file_path) if dst_file_path.endswith(".jinja"): dst_file_path = dst_file_path[: -len(".jinja")] with open(dst_file_path, "w", encoding="utf8") as f: # Jinja template names must use the POSIX path separator "/". - template_name: str = src_relative_file_path.replace(os.sep, posixpath.sep) + template_name = src_relative_file_path.replace(os.sep, posixpath.sep) template: jinja2.environment.Template = env.get_template(name=template_name) f.write( template.render( diff --git a/python_modules/libraries/dagster-dg/dagster_dg_tests/cli_tests/test_generate_commands.py b/python_modules/libraries/dagster-dg/dagster_dg_tests/cli_tests/test_generate_commands.py index 776d33f996bff..8275d839690cd 100644 --- a/python_modules/libraries/dagster-dg/dagster_dg_tests/cli_tests/test_generate_commands.py +++ b/python_modules/libraries/dagster-dg/dagster_dg_tests/cli_tests/test_generate_commands.py @@ -235,7 +235,7 @@ def test_generate_component_type_outside_code_location_fails() -> None: with isolated_example_deployment_foo(runner): result = runner.invoke(generate_component_type_command, ["baz"]) assert result.exit_code != 0 - assert "must be run inside a Dagster code location project" in result.output + assert "must be run inside a Dagster code location directory" in result.output @pytest.mark.parametrize("in_deployment", [True, False]) @@ -267,7 +267,7 @@ def test_generate_component_outside_code_location_fails() -> None: with isolated_example_deployment_foo(runner): result = runner.invoke(generate_component_command, ["bar.baz", "qux"]) assert result.exit_code != 0 - assert "must be run inside a Dagster code location project" in result.output + assert "must be run inside a Dagster code location directory" in result.output @pytest.mark.parametrize("in_deployment", [True, False]) diff --git a/python_modules/libraries/dagster-dg/dagster_dg_tests/cli_tests/test_list_commands.py b/python_modules/libraries/dagster-dg/dagster_dg_tests/cli_tests/test_list_commands.py index 630d2ae2fa95b..f48f2e7d4ce6d 100644 --- a/python_modules/libraries/dagster-dg/dagster_dg_tests/cli_tests/test_list_commands.py +++ b/python_modules/libraries/dagster-dg/dagster_dg_tests/cli_tests/test_list_commands.py @@ -43,7 +43,7 @@ def test_list_code_locations_outside_deployment_fails() -> None: with runner.isolated_filesystem(): result = runner.invoke(list_code_locations_command) assert result.exit_code != 0 - assert "must be run inside a Dagster deployment project" in result.output + assert "must be run inside a Dagster deployment directory" in result.output def test_list_component_types_success(): @@ -69,7 +69,7 @@ def test_list_component_types_outside_code_location_fails() -> None: with runner.isolated_filesystem(): result = runner.invoke(list_component_types_command) assert result.exit_code != 0 - assert "must be run inside a Dagster code location project" in result.output + assert "must be run inside a Dagster code location directory" in result.output def test_list_components_succeeds(): @@ -87,4 +87,4 @@ def test_list_components_command_outside_code_location_fails() -> None: with runner.isolated_filesystem(): result = runner.invoke(list_components_command) assert result.exit_code != 0 - assert "must be run inside a Dagster code location project" in result.output + assert "must be run inside a Dagster code location directory" in result.output