Skip to content

Commit

Permalink
bugfix: preserve original index values in failure case formatting (#121)
Browse files Browse the repository at this point in the history
  • Loading branch information
cosmicBboy authored Nov 11, 2019
1 parent d915e00 commit 9dd4610
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 8 deletions.
2 changes: 2 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ docs:
mock-ci-tests:
. ./ci_tests.sh

conda-build: conda-build-35 conda-build-36 conda-build-37

conda-build-35:
conda-build --python=3.5 conda.recipe

Expand Down
16 changes: 8 additions & 8 deletions pandera/checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,12 +157,12 @@ def _vectorized_error_message(
parent_schema: type,
check_index: int,
failure_cases: Union[pd.DataFrame, pd.Series]) -> str:
"""Construct an error message when an element-wise validator fails.
"""Construct an error message when a validator fails.
:param parent_schema: class of schema being validated.
:param check_index: The validator that failed.
:param failure_cases: The failure cases encountered by the element-wise
validator.
or vectorized validator.
"""
return (
Expand Down Expand Up @@ -198,12 +198,11 @@ def _format_failure_cases(
representing how many failures of that case occurred.
"""
# reset index so that index is just 0-indexed integers
if hasattr(failure_cases, "index") and \
isinstance(failure_cases.index, pd.MultiIndex):
index_name = failure_cases.index.name
failure_cases = (
failure_cases
.reset_index(drop=True)
.rename("failure_case")
.reset_index()
.assign(
Expand All @@ -213,18 +212,18 @@ def _format_failure_cases(
)
)
elif isinstance(failure_cases, pd.DataFrame):
index_name = failure_cases.index.name
failure_cases = (
failure_cases
.reset_index(drop=True)
.pipe(lambda df: pd.Series(
df.itertuples()).map(lambda x: x.__repr__()))
.rename("failure_case")
.reset_index()
)
elif isinstance(failure_cases, pd.Series):
index_name = failure_cases.index.name
failure_cases = (
failure_cases
.reset_index(drop=True)
.rename("failure_case")
.reset_index()
)
Expand All @@ -233,10 +232,11 @@ def _format_failure_cases(
"type of failure_cases argument not understood: %s" %
type(failure_cases))

index_name = "index" if index_name is None else index_name
failure_cases = (
failure_cases
.groupby("failure_case").index.agg([list, len])
.rename(columns={"list": "index", "len": "count"})
.groupby("failure_case")[index_name].agg([list, len])
.rename(columns={"list": index_name, "len": "count"})
.sort_values("count", ascending=False)
)

Expand Down

0 comments on commit 9dd4610

Please sign in to comment.