Skip to content

Commit

Permalink
[dg] Nicely format YAML parsing errors in dg check CLI (#27528)
Browse files Browse the repository at this point in the history
## Summary

Slurp up `yaml.scanner.ScannerErrors` and pass them into our nice error
message formatter, that way `dg check` calls don't early-exit if we hit
a parsing error.

<img width="993" alt="Screenshot 2025-02-03 at 3 20 58 PM"
src="https://github.com/user-attachments/assets/202b316c-aa23-423e-b1bf-b4ad6afb4652"
/>


## Test Plan

New unit tests.
  • Loading branch information
benpankow authored Feb 4, 2025
1 parent b5dd461 commit 230a2fd
Show file tree
Hide file tree
Showing 6 changed files with 84 additions and 14 deletions.
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ check_prettier:
prettier `git ls-files \
'python_modules/*.yml' 'python_modules/*.yaml' 'helm/*.yml' 'helm/*.yaml' \
':!:helm/**/templates/*.yml' ':!:helm/**/templates/*.yaml' '*.md' ':!:docs/*.md' \
':!:python_modules/libraries/dagster-components/dagster_components_tests/integration_tests/components/validation/**/*.yaml' \
':!:README.md'` --check

prettier:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -163,4 +163,32 @@ def _validate_error_msg(msg: str) -> None:
"'asdfasdf' is not of type 'object'",
),
),
ComponentValidationTestCase(
component_path="validation/invalid_yaml_missing_quote",
component_type_filepath=None,
should_error=True,
validate_error_msg=msg_includes_all_of(
"line 2",
"found unexpected end of stream",
),
check_error_msg=msg_includes_all_of(
"component.yaml:2",
"Unable to parse YAML",
"found unexpected end of stream",
),
),
ComponentValidationTestCase(
component_path="validation/invalid_yaml_invalid_char",
component_type_filepath=None,
should_error=True,
validate_error_msg=msg_includes_all_of(
"line 1",
"found character '@' that cannot start any token",
),
check_error_msg=msg_includes_all_of(
"component.yaml:1",
"Unable to parse YAML",
"found character '@' that cannot start any token",
),
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
type: @"bad char"

params: {}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
type: "no close quote
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
)
from dagster_components.utils import ensure_dagster_components_tests_import
from pydantic import ValidationError
from yaml.scanner import ScannerError

from dagster_components_tests.integration_tests.validation_tests.utils import (
load_test_component_defs_inject_component,
Expand All @@ -23,7 +24,7 @@ def test_validation_messages(test_case: ComponentValidationTestCase) -> None:
errors.
"""
if test_case.should_error:
with pytest.raises(ValidationError) as e:
with pytest.raises((ValidationError, ScannerError)) as e:
load_test_component_defs_inject_component(
str(test_case.component_path),
test_case.component_type_filepath,
Expand Down
62 changes: 49 additions & 13 deletions python_modules/libraries/dagster-dg/dagster_dg/cli/component.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
from collections.abc import Mapping, Sequence
from copy import copy
from pathlib import Path
from typing import TYPE_CHECKING, Any, Optional
from typing import Any, NamedTuple, Optional

import click
from click.core import ParameterSource
from jsonschema import Draft202012Validator, ValidationError
from typer.rich_utils import rich_format_help
from yaml.scanner import ScannerError

from dagster_dg.cli.check_utils import error_dict_to_formatted_error
from dagster_dg.cli.global_options import GLOBAL_OPTIONS, dg_global_options
Expand All @@ -28,9 +29,12 @@
parse_json_option,
)
from dagster_dg.yaml_utils import parse_yaml_with_source_positions

if TYPE_CHECKING:
from dagster_dg.yaml_utils.source_position import ValueAndSourcePositionTree
from dagster_dg.yaml_utils.source_position import (
LineCol,
SourcePosition,
SourcePositionTree,
ValueAndSourcePositionTree,
)


@click.group(name="component", cls=DgClickGroup)
Expand Down Expand Up @@ -265,6 +269,26 @@ def _is_local_component(component_name: str) -> bool:
return component_name.startswith(".")


def _scaffold_value_and_source_position_tree(
filename: str, row: int, col: int
) -> ValueAndSourcePositionTree:
return ValueAndSourcePositionTree(
value=None,
source_position_tree=SourcePositionTree(
position=SourcePosition(
filename=filename, start=LineCol(row, col), end=LineCol(row, col)
),
children={},
),
)


class ErrorInput(NamedTuple):
component_name: Optional[str]
error: ValidationError
source_position_tree: ValueAndSourcePositionTree


@component_group.command(name="check", cls=DgClickCommand)
@click.argument("paths", nargs=-1, type=click.Path(exists=True))
@dg_global_options
Expand All @@ -281,7 +305,7 @@ def component_check_command(
cli_config = normalize_cli_config(global_options, context)
dg_context = DgContext.for_code_location_environment(Path.cwd(), cli_config)

validation_errors: list[tuple[Optional[str], ValidationError, ValueAndSourcePositionTree]] = []
validation_errors: list[ErrorInput] = []

component_contents_by_dir = {}
local_component_dirs = set()
Expand All @@ -295,17 +319,30 @@ def component_check_command(

if component_path.exists():
text = component_path.read_text()
component_doc_tree = parse_yaml_with_source_positions(
text, filename=str(component_path)
)

try:
component_doc_tree = parse_yaml_with_source_positions(
text, filename=str(component_path)
)
except ScannerError as se:
validation_errors.append(
ErrorInput(
None,
ValidationError(f"Unable to parse YAML: {se.context}, {se.problem}"),
_scaffold_value_and_source_position_tree(
filename=str(component_path),
row=se.problem_mark.line + 1 if se.problem_mark else 1,
col=se.problem_mark.column + 1 if se.problem_mark else 1,
),
)
)
continue
# First, validate the top-level structure of the component file
# (type and params keys) before we try to validate the params themselves.
top_level_errs = list(
top_level_component_validator.iter_errors(component_doc_tree.value)
)
for err in top_level_errs:
validation_errors.append((None, err, component_doc_tree))
validation_errors.append(ErrorInput(None, err, component_doc_tree))
if top_level_errs:
continue

Expand All @@ -329,11 +366,11 @@ def component_check_command(

v = Draft202012Validator(json_schema)
for err in v.iter_errors(component_doc_tree.value["params"]):
validation_errors.append((component_name, err, component_doc_tree))
validation_errors.append(ErrorInput(component_name, err, component_doc_tree))
except KeyError:
# No matching component type found
validation_errors.append(
(
ErrorInput(
None,
ValidationError(
f"Unable to locate local component type '{component_name}' in {component_dir}."
Expand All @@ -343,7 +380,6 @@ def component_check_command(
component_doc_tree,
)
)

if validation_errors:
for component_name, error, component_doc_tree in validation_errors:
click.echo(
Expand Down

0 comments on commit 230a2fd

Please sign in to comment.