Skip to content

Commit

Permalink
fix regression: all missing columns should be reported (#1117)
Browse files Browse the repository at this point in the history
Signed-off-by: Niels Bantilan <[email protected]>
  • Loading branch information
cosmicBboy authored Mar 15, 2023
1 parent 93c7880 commit eb6b035
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 12 deletions.
37 changes: 25 additions & 12 deletions pandera/backends/pandas/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,12 @@ def validate(

try:
self.check_column_presence(check_obj, schema, column_info)
except SchemaError as exc:
error_handler.collect_error(exc.reason_code, exc)
except SchemaErrors as exc:
for schema_error in exc.schema_errors:
error_handler.collect_error(
schema_error["reason_code"],
schema_error["error"],
)

# strictness check and filter
try:
Expand Down Expand Up @@ -471,18 +475,27 @@ def check_column_presence(
):
"""Check for presence of specified columns in the data object."""
if column_info.absent_column_names:
# NOTE: only report the first absent column for now, need to update
# this when backend stuff is complete
colname, *_ = column_info.absent_column_names
raise SchemaError(
reason_code = "column_not_in_dataframe"
raise SchemaErrors(
schema=schema,
schema_errors=[
{
"reason_code": reason_code,
"error": SchemaError(
schema=schema,
data=check_obj,
message=(
f"column '{colname}' not in dataframe"
f"\n{check_obj.head()}"
),
failure_cases=scalar_failure_case(colname),
check="column_in_dataframe",
reason_code=reason_code,
),
}
for colname in column_info.absent_column_names
],
data=check_obj,
message=(
f"column '{colname}' not in dataframe\n{check_obj.head()}"
),
failure_cases=scalar_failure_case(colname),
check="column_in_dataframe",
reason_code="column_not_in_dataframe",
)

def check_column_values_are_unique(self, check_obj: pd.DataFrame, schema):
Expand Down
21 changes: 21 additions & 0 deletions tests/core/test_schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -1965,3 +1965,24 @@ def test_column_set_unique():
assert not test_schema.columns["a"].unique
test_schema = test_schema.update_column("a", unique=True)
assert test_schema.columns["a"].unique


def test_missing_columns():
"""Test that multiple missing columns is correctly reported."""
schema = DataFrameSchema(
{
"column3": Column(int),
"column2": Column(float),
}
)

df = pd.DataFrame({"column1": [1]})

try:
schema.validate(df, lazy=True)
except errors.SchemaErrors as exc:
assert (exc.failure_cases["check"] == "column_in_dataframe").all()
assert exc.failure_cases["failure_case"].tolist() == [
"column3",
"column2",
]

0 comments on commit eb6b035

Please sign in to comment.