diff --git a/doc/source/reference/arrays.rst b/doc/source/reference/arrays.rst index d37eebef5c0c0..fd3bbff1843a3 100644 --- a/doc/source/reference/arrays.rst +++ b/doc/source/reference/arrays.rst @@ -667,6 +667,7 @@ Data type introspection api.types.is_dtype_equal api.types.is_extension_array_dtype api.types.is_float_dtype + api.types.is_implicit_conversion_to_float64 api.types.is_int64_dtype api.types.is_integer_dtype api.types.is_interval_dtype diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 5c53267158eab..f3cbe9126a443 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -404,6 +404,7 @@ Other API changes - Index set operations (like union or intersection) will now ignore the dtype of an empty ``RangeIndex`` or empty ``Index`` with object dtype when determining the dtype of the resulting Index (:issue:`60797`) +- Added :func:`pandas.api.types.is_implicit_conversion_to_float64` to check if there is a silent conversion to float64 between two dtypes(:issue:`61676`) .. --------------------------------------------------------------------------- .. _whatsnew_300.deprecations: diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 7fc391d3ffb51..d3b276e55cd42 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -47,15 +47,14 @@ is_bool_dtype, is_complex_dtype, is_dict_like, - is_dtype_equal, is_extension_array_dtype, is_float, is_float_dtype, + is_implicit_conversion_to_float64, is_integer, is_integer_dtype, is_list_like, is_object_dtype, - is_signed_integer_dtype, needs_i8_conversion, ) from pandas.core.dtypes.concat import concat_compat @@ -511,8 +510,7 @@ def isin(comps: ListLike, values: ListLike) -> npt.NDArray[np.bool_]: if ( len(values) > 0 and values.dtype.kind in "iufcb" - and not is_signed_integer_dtype(comps) - and not is_dtype_equal(values, comps) + and is_implicit_conversion_to_float64(values, comps) ): # GH#46485 Use object to avoid upcast to float64 later # TODO: Share with _find_common_type_compat diff --git a/pandas/core/dtypes/api.py b/pandas/core/dtypes/api.py index e66104d6afcd9..3f504228b15e7 100644 --- a/pandas/core/dtypes/api.py +++ b/pandas/core/dtypes/api.py @@ -17,6 +17,7 @@ is_float, is_float_dtype, is_hashable, + is_implicit_conversion_to_float64, is_int64_dtype, is_integer, is_integer_dtype, @@ -59,6 +60,7 @@ "is_float", "is_float_dtype", "is_hashable", + "is_implicit_conversion_to_float64", "is_int64_dtype", "is_integer", "is_integer_dtype", diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 68d99937f728c..9bc3daa151bcb 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -713,6 +713,80 @@ def is_dtype_equal(source, target) -> bool: return False +def is_implicit_conversion_to_float64(source, target) -> bool: + """ + Check if there is an implicit conversion to float64 with both dtypes. + + Parameters + ---------- + source : type or str + The first dtype to compare. + target : type or str + The second dtype to compare. + + Returns + ------- + boolean + Whether or not there is an implicit conversion to float64. + + See Also + -------- + api.types.is_categorical_dtype : Check whether the provided array or dtype + is of the Categorical dtype. + api.types.is_string_dtype : Check whether the provided array or dtype + is of the string dtype. + api.types.is_object_dtype : Check whether an array-like or dtype is of the + object dtype. + + Examples + -------- + >>> from pandas.api.types import is_implicit_conversion_to_float64 + >>> is_implicit_conversion_to_float64(int, float) + False + >>> is_implicit_conversion_to_float64("int", int) + False + >>> is_implicit_conversion_to_float64(int, np.int64) + False + >>> is_implicit_conversion_to_float64(np.uint64, np.int64) + True + >>> is_implicit_conversion_to_float64(np.uint64, np.float64) + False + >>> is_implicit_conversion_to_float64(np.uint64, np.uint64) + False + >>> is_implicit_conversion_to_float64(np.uint32, np.uint32) + False + >>> is_implicit_conversion_to_float64(np.uint32, np.int32) + False + >>> is_implicit_conversion_to_float64(np.int32, np.int32) + False + >>> is_implicit_conversion_to_float64(object, "category") + False + >>> is_implicit_conversion_to_float64(np.int64, pd.UInt64Dtype()) + True + >>> from pandas.core.dtypes.dtypes import CategoricalDtype + >>> is_implicit_conversion_to_float64(CategoricalDtype(), "category") + False + """ + try: + src = _get_dtype(source) + tar = _get_dtype(target) + # check only valid dtypes related to implicit conversion to float64 + # other data types derived from 64-bit integers such as U/Int64Dtype + # should also work + if ( + src.kind in "iu" + and src.itemsize == 8 # type: ignore[union-attr] + and tar.kind in "iu" + and tar.itemsize == 8 # type: ignore[union-attr] + ): + return src != tar + else: + return False + except (TypeError, AttributeError, ImportError): + # invalid comparison + return False + + def is_integer_dtype(arr_or_dtype) -> bool: """ Check whether the provided array or dtype is of an integer dtype. @@ -1934,6 +2008,7 @@ def is_all_strings(value: ArrayLike) -> bool: "is_extension_array_dtype", "is_file_like", "is_float_dtype", + "is_implicit_conversion_to_float64", "is_int64_dtype", "is_integer_dtype", "is_interval_dtype", diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 871e977cbe2f8..24019b8e036a1 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -295,6 +295,7 @@ class TestApi(Base): "is_float", "is_float_dtype", "is_hashable", + "is_implicit_conversion_to_float64", "is_int64_dtype", "is_integer", "is_integer_dtype", diff --git a/pandas/tests/api/test_types.py b/pandas/tests/api/test_types.py index bf39370c49d76..659b81a417cb6 100644 --- a/pandas/tests/api/test_types.py +++ b/pandas/tests/api/test_types.py @@ -20,6 +20,7 @@ class TestTypes(Base): "is_dtype_equal", "is_float", "is_float_dtype", + "is_implicit_conversion_to_float64", "is_int64_dtype", "is_integer", "is_integer_dtype", diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 7fb421e27bb40..ec87441e3941a 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1197,6 +1197,13 @@ def test_isin_unsigned_dtype(self): expected = Series(False) tm.assert_series_equal(result, expected) + def test_isin_unsigned_dtype_other_side(self): + # GH#46485 + ser = Series([1378774140726870442], dtype=np.int64) + result = ser.isin([np.uint64(1378774140726870528)]) + expected = Series(False) + tm.assert_series_equal(result, expected) + class TestValueCounts: def test_value_counts(self):