Skip to content

BUG: Fix implicit conversion to float64 with isin() #61679

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/reference/arrays.rst
Original file line number Diff line number Diff line change
Expand Up @@ -667,6 +667,7 @@ Data type introspection
api.types.is_dtype_equal
api.types.is_extension_array_dtype
api.types.is_float_dtype
api.types.is_implicit_conversion_to_float64
api.types.is_int64_dtype
api.types.is_integer_dtype
api.types.is_interval_dtype
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,7 @@ Other API changes
- Index set operations (like union or intersection) will now ignore the dtype of
an empty ``RangeIndex`` or empty ``Index`` with object dtype when determining
the dtype of the resulting Index (:issue:`60797`)
- Added :func:`pandas.api.types.is_implicit_conversion_to_float64` to check if there is a silent conversion to float64 between two dtypes(:issue:`61676`)

.. ---------------------------------------------------------------------------
.. _whatsnew_300.deprecations:
Expand Down
6 changes: 2 additions & 4 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,14 @@
is_bool_dtype,
is_complex_dtype,
is_dict_like,
is_dtype_equal,
is_extension_array_dtype,
is_float,
is_float_dtype,
is_implicit_conversion_to_float64,
is_integer,
is_integer_dtype,
is_list_like,
is_object_dtype,
is_signed_integer_dtype,
needs_i8_conversion,
)
from pandas.core.dtypes.concat import concat_compat
Expand Down Expand Up @@ -511,8 +510,7 @@ def isin(comps: ListLike, values: ListLike) -> npt.NDArray[np.bool_]:
if (
len(values) > 0
and values.dtype.kind in "iufcb"
and not is_signed_integer_dtype(comps)
and not is_dtype_equal(values, comps)
and is_implicit_conversion_to_float64(values, comps)
):
# GH#46485 Use object to avoid upcast to float64 later
# TODO: Share with _find_common_type_compat
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/dtypes/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
is_float,
is_float_dtype,
is_hashable,
is_implicit_conversion_to_float64,
is_int64_dtype,
is_integer,
is_integer_dtype,
Expand Down Expand Up @@ -59,6 +60,7 @@
"is_float",
"is_float_dtype",
"is_hashable",
"is_implicit_conversion_to_float64",
"is_int64_dtype",
"is_integer",
"is_integer_dtype",
Expand Down
75 changes: 75 additions & 0 deletions pandas/core/dtypes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -713,6 +713,80 @@ def is_dtype_equal(source, target) -> bool:
return False


def is_implicit_conversion_to_float64(source, target) -> bool:
"""
Check if there is an implicit conversion to float64 with both dtypes.

Parameters
----------
source : type or str
The first dtype to compare.
target : type or str
The second dtype to compare.

Returns
-------
boolean
Whether or not there is an implicit conversion to float64.

See Also
--------
api.types.is_categorical_dtype : Check whether the provided array or dtype
is of the Categorical dtype.
api.types.is_string_dtype : Check whether the provided array or dtype
is of the string dtype.
api.types.is_object_dtype : Check whether an array-like or dtype is of the
object dtype.

Examples
--------
>>> from pandas.api.types import is_implicit_conversion_to_float64
>>> is_implicit_conversion_to_float64(int, float)
False
>>> is_implicit_conversion_to_float64("int", int)
False
>>> is_implicit_conversion_to_float64(int, np.int64)
False
>>> is_implicit_conversion_to_float64(np.uint64, np.int64)
True
>>> is_implicit_conversion_to_float64(np.uint64, np.float64)
False
>>> is_implicit_conversion_to_float64(np.uint64, np.uint64)
False
>>> is_implicit_conversion_to_float64(np.uint32, np.uint32)
False
>>> is_implicit_conversion_to_float64(np.uint32, np.int32)
False
>>> is_implicit_conversion_to_float64(np.int32, np.int32)
False
>>> is_implicit_conversion_to_float64(object, "category")
False
>>> is_implicit_conversion_to_float64(np.int64, pd.UInt64Dtype())
True
>>> from pandas.core.dtypes.dtypes import CategoricalDtype
>>> is_implicit_conversion_to_float64(CategoricalDtype(), "category")
False
"""
try:
src = _get_dtype(source)
tar = _get_dtype(target)
# check only valid dtypes related to implicit conversion to float64
# other data types derived from 64-bit integers such as U/Int64Dtype
# should also work
if (
src.kind in "iu"
and src.itemsize == 8 # type: ignore[union-attr]
and tar.kind in "iu"
and tar.itemsize == 8 # type: ignore[union-attr]
):
return src != tar
else:
return False
except (TypeError, AttributeError, ImportError):
# invalid comparison
return False


def is_integer_dtype(arr_or_dtype) -> bool:
"""
Check whether the provided array or dtype is of an integer dtype.
Expand Down Expand Up @@ -1934,6 +2008,7 @@ def is_all_strings(value: ArrayLike) -> bool:
"is_extension_array_dtype",
"is_file_like",
"is_float_dtype",
"is_implicit_conversion_to_float64",
"is_int64_dtype",
"is_integer_dtype",
"is_interval_dtype",
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/api/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,7 @@ class TestApi(Base):
"is_float",
"is_float_dtype",
"is_hashable",
"is_implicit_conversion_to_float64",
"is_int64_dtype",
"is_integer",
"is_integer_dtype",
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/api/test_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class TestTypes(Base):
"is_dtype_equal",
"is_float",
"is_float_dtype",
"is_implicit_conversion_to_float64",
"is_int64_dtype",
"is_integer",
"is_integer_dtype",
Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/test_algos.py
Original file line number Diff line number Diff line change
Expand Up @@ -1197,6 +1197,13 @@ def test_isin_unsigned_dtype(self):
expected = Series(False)
tm.assert_series_equal(result, expected)

def test_isin_unsigned_dtype_other_side(self):
# GH#46485
ser = Series([1378774140726870442], dtype=np.int64)
result = ser.isin([np.uint64(1378774140726870528)])
expected = Series(False)
tm.assert_series_equal(result, expected)


class TestValueCounts:
def test_value_counts(self):
Expand Down
Loading