Skip to content

Commit

Permalink
fix: allow empty str/NaN to be converted to bools (#323)
Browse files Browse the repository at this point in the history
  • Loading branch information
bastienboutonnet authored Jan 17, 2021
1 parent 592b489 commit 31a92e1
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 28 deletions.
1 change: 1 addition & 0 deletions changelog/323.fix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Columns to be converted to booleans which contained empty strings were converted as ``np.nan`` after sheet ingestion. Because this value had not been added to the allowable values for boolean conversion, users who had null values in their sheets could have ran into the app raising an error and not wanting to convert. Given that it is perfectly fine to have ``null`` values among booleans it is not made possible
50 changes: 25 additions & 25 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion sheetwork/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from urllib.error import URLError

import luddite
import numpy as np
import pandas
from packaging.version import parse as semver_parse

Expand Down Expand Up @@ -243,7 +244,7 @@ def handle_booleans(df: pandas.DataFrame, overwrite_dict: Dict[str, str]) -> pan
Python booleans.
"""
df = df.copy()
boolean_map_dict = {"true": True, "false": False}
boolean_map_dict = {"true": True, "false": False, np.nan: pandas.NA}
for column, data_type in overwrite_dict.items():

if data_type == "boolean" and df[column].dtypes == "object":
Expand All @@ -256,6 +257,7 @@ def handle_booleans(df: pandas.DataFrame, overwrite_dict: Dict[str, str]) -> pan
unique_boolean_values = df[column].unique().tolist()
if set(unique_boolean_values).issubset(boolean_map_dict.keys()):
df[column] = df[column].map(boolean_map_dict)

else:
raise ColumnNotBooleanCompatibleError(
f"The following values in {column} cannot be turned into booleans: "
Expand Down
2 changes: 2 additions & 0 deletions tests/sheets.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ sheets:
datatype: date
- name: camel_cased_col
datatype: int
- name: bool_col
datatype: boolean
excluded_columns: ["to_exclude", "col_not_in_df_for_fun"]

- sheet_name: test_sheet_2
Expand Down
10 changes: 8 additions & 2 deletions tests/utils_test.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from pathlib import Path
from sheetwork.core.main import handle

import pytest
import numpy as np
import pandas
from pandas.testing import assert_frame_equal
from .mockers import CAST_DF, TO_CAST_DF, generate_test_df

Expand Down Expand Up @@ -95,12 +96,17 @@ def test_handle_booleans(has_good_booleans):
from sheetwork.core.utils import handle_booleans
from sheetwork.core.exceptions import ColumnNotBooleanCompatibleError

expectation = generate_test_df(
{"col_a": [False, True, pandas.NA], "col_b": [True, False, False]}
)
illegal_booleans_df = {"col_a": [False, "True"], "col_b": ["bad", "food"]}
good_booleans_df = {"col_a": [False, True], "col_b": [True, "False"]}
good_booleans_df = {"col_a": [False, True, np.nan], "col_b": [True, "False", False]}
col_casting_dict = {"col_a": "boolean", "col_b": "boolean"}
if has_good_booleans:
df = generate_test_df(good_booleans_df)
df = handle_booleans(df, col_casting_dict)
assert df["col_a"].values.tolist() == expectation["col_a"].values.tolist()
assert df["col_b"].values.tolist() == expectation["col_b"].values.tolist()
else:
with pytest.raises(ColumnNotBooleanCompatibleError):
df = generate_test_df(illegal_booleans_df)
Expand Down

0 comments on commit 31a92e1

Please sign in to comment.