Skip to content

Commit

Permalink
feat(cli): add dagster project scaffold --excludes foo option
Browse files Browse the repository at this point in the history
Fixed flakey tests by refactor `os.path` to pathlib.Path.
  • Loading branch information
dbrtly committed Oct 27, 2024
1 parent bdcf8f9 commit 8178a4e
Show file tree
Hide file tree
Showing 16 changed files with 199 additions and 206 deletions.
99 changes: 49 additions & 50 deletions python_modules/dagster/dagster/_cli/project.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import os
import sys
from typing import NamedTuple, Optional, Sequence, Tuple, Union
from typing import NamedTuple, Optional, Sequence

import click
import requests
Expand All @@ -20,14 +20,14 @@ def project_cli():
FLAGGED_PACKAGE_KEYWORDS = ["dagster", "dbt"]

scaffold_repository_command_help_text = (
"(DEPRECATED; Use `dagster project scaffold-code-location` instead) "
"(DEPRECATED; Use `dagster project scaffold --excludes readme` instead) "
"Create a folder structure with a single Dagster repository, in the current directory. "
"This CLI helps you to scaffold a new Dagster repository within a folder structure that "
"includes multiple Dagster repositories"
)

scaffold_code_location_command_help_text = (
"(DEPRECATED; Use `dagster project scaffold --excludes README.md` instead) "
"(DEPRECATED; Use `dagster project scaffold --excludes readme` instead) "
"Create a folder structure with a single Dagster code location, in the current directory. "
"This CLI helps you to scaffold a new Dagster code location within a folder structure that "
"includes multiple Dagster code locations."
Expand All @@ -52,7 +52,7 @@ class PackageConflictCheckResult(NamedTuple):
conflict_exists: bool = False


def check_if_pypi_package_conflict_exists(project_name: str) -> PackageConflictCheckResult:
def _get_pypi_package_conflict_result(project_name: str) -> PackageConflictCheckResult:
"""Checks if the project name contains any flagged keywords. If so, raises a warning if a PyPI
package with the same name exists. This is to prevent import errors from occurring due to a
project name that conflicts with an imported package.
Expand All @@ -71,7 +71,35 @@ def check_if_pypi_package_conflict_exists(project_name: str) -> PackageConflictC
return PackageConflictCheckResult(request_error_msg=None, conflict_exists=False)


# start deprecated commands
def check_pypi_package_conflict(project_name: str) -> None:
package_check_result: PackageConflictCheckResult = _get_pypi_package_conflict_result(
project_name
)
if package_check_result.request_error_msg:
click.echo(
click.style(
"An error occurred while checking for package conflicts:"
f" {package_check_result.request_error_msg}. \n\nConflicting package names will"
" cause import errors in your project if the existing PyPI package is included"
" as a dependency in your scaffolded project. If desired, this check can be"
" skipped by adding the `--ignore-package-conflict` flag.",
fg="red",
)
)
sys.exit(1)

if package_check_result.conflict_exists:
click.echo(
click.style(
f"The project '{project_name}' conflicts with an existing PyPI package."
" Conflicting package names will cause import errors in your project if the"
" existing PyPI package is included as a dependency in your scaffolded"
" project. Please choose another name, or add the `--ignore-package-conflict`"
" flag to bypass this check.",
fg="yellow",
)
)
sys.exit(1)


@project_cli.command(
Expand All @@ -85,7 +113,7 @@ def check_if_pypi_package_conflict_exists(project_name: str) -> PackageConflictC
type=click.STRING,
help="Name of the new Dagster repository",
)
def scaffold_repository_command(name: str):
def scaffold_repository_command(name: str) -> None:
dir_abspath = os.path.abspath(name)
if os.path.isdir(dir_abspath) and os.path.exists(dir_abspath):
click.echo(
Expand All @@ -96,7 +124,7 @@ def scaffold_repository_command(name: str):

click.echo(
click.style(
"WARNING: This command is deprecated. Use `dagster project scaffold` instead.",
"WARNING: command is deprecated. Use `dagster project scaffold --excludes readme` instead.",
fg="yellow",
)
)
Expand All @@ -119,45 +147,13 @@ def scaffold_repository_command(name: str):
def scaffold_code_location_command(context, name: str):
click.echo(
click.style(
"WARNING: This command is deprecated. Use `dagster project scaffold --excludes README.md` instead.",
"WARNING: command is deprecated. Use `dagster project scaffold --excludes readme` instead.",
fg="yellow",
)
)
context.invoke(scaffold_command, name=name, excludes=["README.md"])


# end deprecated commands


def _check_and_error_on_package_conflicts(project_name: str) -> None:
package_check_result = check_if_pypi_package_conflict_exists(project_name)
if package_check_result.request_error_msg:
click.echo(
click.style(
"An error occurred while checking for package conflicts:"
f" {package_check_result.request_error_msg}. \n\nConflicting package names will"
" cause import errors in your project if the existing PyPI package is included"
" as a dependency in your scaffolded project. If desired, this check can be"
" skipped by adding the `--ignore-package-conflict` flag.",
fg="red",
)
)
sys.exit(1)

if package_check_result.conflict_exists:
click.echo(
click.style(
f"The project '{project_name}' conflicts with an existing PyPI package."
" Conflicting package names will cause import errors in your project if the"
" existing PyPI package is included as a dependency in your scaffolded"
" project. Please choose another name, or add the `--ignore-package-conflict`"
" flag to bypass this check.",
fg="yellow",
)
)
sys.exit(1)


@project_cli.command(
name="scaffold",
short_help=scaffold_command_help_text,
Expand All @@ -174,7 +170,7 @@ def _check_and_error_on_package_conflicts(project_name: str) -> None:
multiple=True,
type=click.STRING,
default=[],
help="Exclude file patterns from the project template",
help="Exclude case-insensitive file patterns from the project template",
)
@click.option(
"--ignore-package-conflict",
Expand All @@ -183,9 +179,12 @@ def _check_and_error_on_package_conflicts(project_name: str) -> None:
help="Controls whether the project name can conflict with an existing PyPI package.",
)
def scaffold_command(
name: str, excludes: Union[Tuple, list], ignore_package_conflict: bool = False
):
excludes = list(excludes)
name: str, excludes: list[str] | tuple | None = None, ignore_package_conflict: bool = False
) -> None:
excludes = [] if not excludes else excludes
if isinstance(excludes, tuple):
excludes = list(excludes)

dir_abspath = os.path.abspath(name)
if os.path.isdir(dir_abspath) and os.path.exists(dir_abspath):
click.echo(
Expand All @@ -195,9 +194,9 @@ def scaffold_command(
sys.exit(1)

if not ignore_package_conflict:
_check_and_error_on_package_conflicts(name)
check_pypi_package_conflict(name)

generate_project(dir_abspath, excludes)
generate_project(dir_abspath, excludes=excludes)
click.echo(_styled_success_statement(name, dir_abspath))


Expand Down Expand Up @@ -228,7 +227,7 @@ def scaffold_command(
default=dagster_version,
show_default=True,
)
def from_example_command(name: Optional[str], example: str, version: str):
def from_example_command(name: Optional[str], example: str, version: str) -> None:
name = name or example
dir_abspath = os.path.abspath(name) + "/"
if os.path.isdir(dir_abspath) and os.path.exists(dir_abspath):
Expand All @@ -238,7 +237,7 @@ def from_example_command(name: Optional[str], example: str, version: str):
)
sys.exit(1)
else:
os.mkdir(dir_abspath)
os.makedirs(dir_abspath, exist_ok=True)

download_example_from_github(dir_abspath, example, version)

Expand All @@ -250,7 +249,7 @@ def from_example_command(name: Optional[str], example: str, version: str):
short_help=list_examples_command_help_text,
help=list_examples_command_help_text,
)
def from_example_list_command():
def from_example_list_command() -> None:
click.echo("Examples available in `dagster project from-example`:")

click.echo(_styled_list_examples_prints(AVAILABLE_EXAMPLES))
Expand All @@ -260,7 +259,7 @@ def _styled_list_examples_prints(examples: Sequence[str]) -> str:
return "\n".join([f"* {name}" for name in examples])


def _styled_success_statement(name: str, path: str):
def _styled_success_statement(name: str, path: str) -> None:
return (
click.style("Success!", fg="green")
+ " Created "
Expand Down
6 changes: 4 additions & 2 deletions python_modules/dagster/dagster/_generate/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,13 @@
# Examples aren't that can't be downloaded from the dagster project CLI
EXAMPLES_TO_IGNORE = [
"deploy_k8s_beta",
"docs_snippets",
"docs_beta_snippets",
"docs_snippets",
"experimental",
"temp_pins.txt",
"use_case_repository",
"pyproject.toml",
"README.md",
"temp_pins.txt",
]
# Hardcoded list of available examples. The list is tested against the examples folder in this mono
# repo to make sure it's up-to-date.
Expand Down
98 changes: 43 additions & 55 deletions python_modules/dagster/dagster/_generate/generate.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
import os
import posixpath
from typing import List, Optional
from typing import List

import click
import jinja2

from dagster.version import __version__ as dagster_version

IGNORE_PATTERN_LIST: List[str] = [
DEFAULT_EXCLUDES: List[str] = [
"__pycache__",
".pytest_cache",
"*.egg-info",
".DS_Store",
".ruff_cache",
"tox.ini",
]

Expand All @@ -24,111 +25,98 @@ def generate_repository(path: str):
click.echo(f"Creating a Dagster repository at {path}.")

# Render templates for Dagster repository
_render_templates(
generate_project(
path=path,
excludes=None,
name_placeholder=REPO_NAME_PLACEHOLDER,
project_template_path=os.path.join(
os.path.dirname(__file__), "templates", REPO_NAME_PLACEHOLDER
),
templates_path=os.path.join(os.path.dirname(__file__), "templates", REPO_NAME_PLACEHOLDER),
)

click.echo(f"Generated files for Dagster repository in {path}.")


def generate_project(path: str, excludes: Optional[List[str]] = None):
if not excludes:
excludes = []
def generate_project(
path: str,
excludes: List[str] | None = None,
name_placeholder: str = PROJECT_NAME_PLACEHOLDER,
templates_path: str = PROJECT_NAME_PLACEHOLDER,
):
excludes: list[str] = DEFAULT_EXCLUDES if not excludes else DEFAULT_EXCLUDES + excludes

click.echo(f"Creating a Dagster project at {path}.")

# Step 1: Render templates for Dagster project
_render_templates(
path=path,
name_placeholder=PROJECT_NAME_PLACEHOLDER,
project_template_path=os.path.join(
os.path.dirname(__file__), "templates", PROJECT_NAME_PLACEHOLDER
),
skip_mkdir=True,
excludes=excludes,
)

click.echo(f"Generated files for Dagster project in {path}.")


def _render_templates(
path: str,
name_placeholder: str,
project_template_path: str,
skip_mkdir: bool = False,
excludes: List[str] = [],
):
normalized_path = os.path.normpath(path)
code_location_name = os.path.basename(normalized_path).replace("-", "_")
project_name: str = os.path.basename(normalized_path).replace("-", "_")
os.mkdir(normalized_path)

if not skip_mkdir: # skip if the dir is created by previous command
os.mkdir(normalized_path)

loader = jinja2.FileSystemLoader(searchpath=project_template_path)
env = jinja2.Environment(loader=loader)
project_template_path: str = os.path.join(
os.path.dirname(__file__), "templates", templates_path
)
loader: jinja2.loaders.FileSystemLoader = jinja2.FileSystemLoader(
searchpath=project_template_path
)
env: jinja2.environment.Environment = jinja2.Environment(loader=loader)

# merge custom skip_files with the default list
excludes = IGNORE_PATTERN_LIST + excludes
for root, dirs, files in os.walk(project_template_path):
# For each subdirectory in the source template, create a subdirectory in the destination.
for dirname in dirs:
src_dir_path = os.path.join(root, dirname)
src_dir_path: str = os.path.join(root, dirname)
if _should_skip_file(src_dir_path, excludes):
continue

src_relative_dir_path = os.path.relpath(src_dir_path, project_template_path)
dst_relative_dir_path = src_relative_dir_path.replace(
src_relative_dir_path: str = os.path.relpath(src_dir_path, project_template_path)
dst_relative_dir_path: str = src_relative_dir_path.replace(
name_placeholder,
code_location_name,
project_name,
1,
)
dst_dir_path = os.path.join(normalized_path, dst_relative_dir_path)
dst_dir_path: str = os.path.join(normalized_path, dst_relative_dir_path)

os.mkdir(dst_dir_path)

# For each file in the source template, render a file in the destination.
for filename in files:
src_file_path = os.path.join(root, filename)
src_file_path: posixpath = os.path.join(root, filename)
if _should_skip_file(src_file_path, excludes):
continue

src_relative_file_path = os.path.relpath(src_file_path, project_template_path)
dst_relative_file_path = src_relative_file_path.replace(
src_relative_file_path: str = os.path.relpath(src_file_path, project_template_path)
dst_relative_file_path: str = src_relative_file_path.replace(
name_placeholder,
code_location_name,
project_name,
1,
)
dst_file_path = os.path.join(normalized_path, dst_relative_file_path)
dst_file_path: str = os.path.join(normalized_path, dst_relative_file_path)

if dst_file_path.endswith(".tmpl"):
dst_file_path = dst_file_path[: -len(".tmpl")]
if dst_file_path.endswith(".jinja"):
dst_file_path = dst_file_path[: -len(".jinja")]

with open(dst_file_path, "w", encoding="utf8") as f:
# Jinja template names must use the POSIX path separator "/".
template_name = src_relative_file_path.replace(os.sep, posixpath.sep)
template = env.get_template(name=template_name)
template_name: str = src_relative_file_path.replace(os.sep, posixpath.sep)
template: jinja2.environment.Template = env.get_template(name=template_name)
f.write(
template.render(
repo_name=code_location_name, # deprecated
code_location_name=code_location_name,
repo_name=project_name, # deprecated
code_location_name=project_name,
dagster_version=dagster_version,
project_name=project_name,
)
)
f.write("\n")

click.echo(f"Generated files for Dagster project in {path}.")


def _should_skip_file(path: str, excludes: List[str] = IGNORE_PATTERN_LIST):
def _should_skip_file(path: str, excludes: List[str] = DEFAULT_EXCLUDES):
"""Given a file path `path` in a source template, returns whether or not the file should be skipped
when generating destination files.
Technically, `path` could also be a directory path that should be skipped.
"""
for pattern in excludes:
if pattern in path:
if pattern.lower() in path.lower():
return True

return False
Loading

0 comments on commit 8178a4e

Please sign in to comment.