Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -621,6 +621,7 @@ Other Deprecations
- Deprecated lowercase strings ``w``, ``w-mon``, ``w-tue``, etc. denoting frequencies in :class:`Week` in favour of ``W``, ``W-MON``, ``W-TUE``, etc. (:issue:`58998`)
- Deprecated parameter ``method`` in :meth:`DataFrame.reindex_like` / :meth:`Series.reindex_like` (:issue:`58667`)
- Deprecated strings ``w``, ``d``, ``MIN``, ``MS``, ``US`` and ``NS`` denoting units in :class:`Timedelta` in favour of ``W``, ``D``, ``min``, ``ms``, ``us`` and ``ns`` (:issue:`59051`)
- Deprecated the ``.str`` accessor for ``object`` dtype :class:`Series`; explicitly cast to ``"str"`` dtype before using the accessor instead (:issue:`29710`)
- Deprecated the ``arg`` parameter of ``Series.map``; pass the added ``func`` argument instead. (:issue:`61260`)
- Deprecated using ``epoch`` date format in :meth:`DataFrame.to_json` and :meth:`Series.to_json`, use ``iso`` instead. (:issue:`57063`)

Expand Down
5 changes: 4 additions & 1 deletion pandas/core/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,10 @@ def _dir_additions(self) -> set[str]:
"""
Add additional __dir__ for this object.
"""
return {accessor for accessor in self._accessors if hasattr(self, accessor)}
with warnings.catch_warnings():
# Don't issue warning about .str accessor on object dtype
warnings.filterwarnings("ignore")
return {accessor for accessor in self._accessors if hasattr(self, accessor)}

def __dir__(self) -> list[str]:
"""
Expand Down
12 changes: 9 additions & 3 deletions pandas/core/strings/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
from pandas.core.dtypes.missing import isna

from pandas.core.arrays import ExtensionArray
from pandas.core.arrays.string_ import StringDtype
from pandas.core.base import NoNewAttributesMixin
from pandas.core.construction import extract_array

Expand Down Expand Up @@ -203,8 +204,6 @@ class StringMethods(NoNewAttributesMixin):
# * extractall

def __init__(self, data) -> None:
from pandas.core.arrays.string_ import StringDtype

self._inferred_dtype = self._validate(data)
self._is_categorical = isinstance(data.dtype, CategoricalDtype)
self._is_string = isinstance(data.dtype, StringDtype)
Expand Down Expand Up @@ -255,6 +254,14 @@ def _validate(data):
data = extract_array(data)

values = getattr(data, "categories", data) # categorical / normal
if data.dtype == object and get_option("future.infer_string"):
warnings.warn(
# GH#29710
".str accessor on object dtype is deprecated. Explicitly cast "
"to 'str' dtype instead.",
FutureWarning,
stacklevel=find_stack_level(),
)

inferred_dtype = lib.infer_dtype(values, skipna=True)

Expand Down Expand Up @@ -3875,7 +3882,6 @@ def _result_dtype(arr):
# workaround #27953
# ideally we just pass `dtype=arr.dtype` unconditionally, but this fails
# when the list of values is empty.
from pandas.core.arrays.string_ import StringDtype

if isinstance(arr.dtype, (ArrowDtype, StringDtype)):
return arr.dtype
Expand Down
23 changes: 16 additions & 7 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -5278,11 +5278,16 @@ def _convert_string_array(data: np.ndarray, encoding: str, errors: str) -> np.nd
"""
# encode if needed
if len(data):
data = (
Series(data.ravel(), copy=False, dtype="object")
.str.encode(encoding, errors)
._values.reshape(data.shape)
# We can _almost_ do ser.astype("str").str.encode(encoding, errors)
# But the conversion to "str" can fail in e.g. test_to_hdf_errors
ser = Series(data.ravel(), copy=False, dtype="object")
arr = np.asarray(ser)
func = lambda x: x.encode(encoding, errors=errors)
mask = isna(arr)
result = lib.map_infer_mask(
arr, func, mask.view(np.uint8), convert=not np.all(mask)
)
data = result.reshape(data.shape)

# create the sized dtype
ensured = ensure_object(data.ravel())
Expand Down Expand Up @@ -5319,9 +5324,13 @@ def _unconvert_string_array(
dtype = f"U{itemsize}"

if isinstance(data[0], bytes):
ser = Series(data, copy=False).str.decode(
encoding, errors=errors, dtype="object"
)
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore", ".str accessor on object dtype is deprecated"
)
ser = Series(data, copy=False).str.decode(
encoding, errors=errors, dtype="object"
)
data = ser.to_numpy()
data.flags.writeable = True
else:
Expand Down
7 changes: 6 additions & 1 deletion pandas/io/sas/sas7bdat.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from datetime import datetime
import sys
from typing import TYPE_CHECKING
import warnings

import numpy as np

Expand Down Expand Up @@ -717,7 +718,11 @@ def _chunk_to_dataframe(self) -> DataFrame:
elif self._column_types[j] == b"s":
rslt[name] = pd.Series(self._string_chunk[js, :], index=ix, copy=False)
if self.convert_text and (self.encoding is not None):
rslt[name] = self._decode_string(rslt[name].str)
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore", ".str accessor on object dtype is deprecated"
)
rslt[name] = self._decode_string(rslt[name].str)
if infer_string:
rslt[name] = rslt[name].astype("str")

Expand Down
2 changes: 1 addition & 1 deletion pandas/io/stata.py
Original file line number Diff line number Diff line change
Expand Up @@ -2744,7 +2744,7 @@ def _encode_strings(self) -> None:
types cannot be exported and must first be converted to one of the
supported types."""
)
encoded = self.data[col].str.encode(self._encoding)
encoded = self.data[col].astype("str").str.encode(self._encoding)
# If larger than _max_string_length do nothing
if (
max_len_string_array(ensure_object(self.data[col]._values))
Expand Down
108 changes: 56 additions & 52 deletions pandas/tests/dtypes/test_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
Generic,
TypeVar,
)
import warnings

import numpy as np
import pytest
Expand Down Expand Up @@ -134,58 +135,61 @@ def shape(self):

# collect all objects to be tested for list-like-ness; use tuples of objects,
# whether they are list-like or not (special casing for sets), and their ID
ll_params = [
([1], True, "list"),
([], True, "list-empty"),
((1,), True, "tuple"),
((), True, "tuple-empty"),
({"a": 1}, True, "dict"),
({}, True, "dict-empty"),
({"a", 1}, "set", "set"),
(set(), "set", "set-empty"),
(frozenset({"a", 1}), "set", "frozenset"),
(frozenset(), "set", "frozenset-empty"),
(iter([1, 2]), True, "iterator"),
(iter([]), True, "iterator-empty"),
((x for x in [1, 2]), True, "generator"),
((_ for _ in []), True, "generator-empty"),
(Series([1]), True, "Series"),
(Series([], dtype=object), True, "Series-empty"),
# Series.str will still raise a TypeError if iterated
(Series(["a"]).str, True, "StringMethods"),
(Series([], dtype="O").str, True, "StringMethods-empty"),
(Index([1]), True, "Index"),
(Index([]), True, "Index-empty"),
(DataFrame([[1]]), True, "DataFrame"),
(DataFrame(), True, "DataFrame-empty"),
(np.ndarray((2,) * 1), True, "ndarray-1d"),
(np.array([]), True, "ndarray-1d-empty"),
(np.ndarray((2,) * 2), True, "ndarray-2d"),
(np.array([[]]), True, "ndarray-2d-empty"),
(np.ndarray((2,) * 3), True, "ndarray-3d"),
(np.array([[[]]]), True, "ndarray-3d-empty"),
(np.ndarray((2,) * 4), True, "ndarray-4d"),
(np.array([[[[]]]]), True, "ndarray-4d-empty"),
(np.array(2), False, "ndarray-0d"),
(MockNumpyLikeArray(np.ndarray((2,) * 1)), True, "duck-ndarray-1d"),
(MockNumpyLikeArray(np.array([])), True, "duck-ndarray-1d-empty"),
(MockNumpyLikeArray(np.ndarray((2,) * 2)), True, "duck-ndarray-2d"),
(MockNumpyLikeArray(np.array([[]])), True, "duck-ndarray-2d-empty"),
(MockNumpyLikeArray(np.ndarray((2,) * 3)), True, "duck-ndarray-3d"),
(MockNumpyLikeArray(np.array([[[]]])), True, "duck-ndarray-3d-empty"),
(MockNumpyLikeArray(np.ndarray((2,) * 4)), True, "duck-ndarray-4d"),
(MockNumpyLikeArray(np.array([[[[]]]])), True, "duck-ndarray-4d-empty"),
(MockNumpyLikeArray(np.array(2)), False, "duck-ndarray-0d"),
(1, False, "int"),
(b"123", False, "bytes"),
(b"", False, "bytes-empty"),
("123", False, "string"),
("", False, "string-empty"),
(str, False, "string-type"),
(object(), False, "object"),
(np.nan, False, "NaN"),
(None, False, "None"),
]
with warnings.catch_warnings():
# suppress warning on "StringMethods-empty" with object dtype
warnings.filterwarnings("ignore", ".str accessor on object dtype is deprecated")
ll_params = [
([1], True, "list"),
([], True, "list-empty"),
((1,), True, "tuple"),
((), True, "tuple-empty"),
({"a": 1}, True, "dict"),
({}, True, "dict-empty"),
({"a", 1}, "set", "set"),
(set(), "set", "set-empty"),
(frozenset({"a", 1}), "set", "frozenset"),
(frozenset(), "set", "frozenset-empty"),
(iter([1, 2]), True, "iterator"),
(iter([]), True, "iterator-empty"),
((x for x in [1, 2]), True, "generator"),
((_ for _ in []), True, "generator-empty"),
(Series([1]), True, "Series"),
(Series([], dtype=object), True, "Series-empty"),
# Series.str will still raise a TypeError if iterated
(Series(["a"]).str, True, "StringMethods"),
(Series([], dtype="O").str, True, "StringMethods-empty"),
(Index([1]), True, "Index"),
(Index([]), True, "Index-empty"),
(DataFrame([[1]]), True, "DataFrame"),
(DataFrame(), True, "DataFrame-empty"),
(np.ndarray((2,) * 1), True, "ndarray-1d"),
(np.array([]), True, "ndarray-1d-empty"),
(np.ndarray((2,) * 2), True, "ndarray-2d"),
(np.array([[]]), True, "ndarray-2d-empty"),
(np.ndarray((2,) * 3), True, "ndarray-3d"),
(np.array([[[]]]), True, "ndarray-3d-empty"),
(np.ndarray((2,) * 4), True, "ndarray-4d"),
(np.array([[[[]]]]), True, "ndarray-4d-empty"),
(np.array(2), False, "ndarray-0d"),
(MockNumpyLikeArray(np.ndarray((2,) * 1)), True, "duck-ndarray-1d"),
(MockNumpyLikeArray(np.array([])), True, "duck-ndarray-1d-empty"),
(MockNumpyLikeArray(np.ndarray((2,) * 2)), True, "duck-ndarray-2d"),
(MockNumpyLikeArray(np.array([[]])), True, "duck-ndarray-2d-empty"),
(MockNumpyLikeArray(np.ndarray((2,) * 3)), True, "duck-ndarray-3d"),
(MockNumpyLikeArray(np.array([[[]]])), True, "duck-ndarray-3d-empty"),
(MockNumpyLikeArray(np.ndarray((2,) * 4)), True, "duck-ndarray-4d"),
(MockNumpyLikeArray(np.array([[[[]]]])), True, "duck-ndarray-4d-empty"),
(MockNumpyLikeArray(np.array(2)), False, "duck-ndarray-0d"),
(1, False, "int"),
(b"123", False, "bytes"),
(b"", False, "bytes-empty"),
("123", False, "string"),
("", False, "string-empty"),
(str, False, "string-type"),
(object(), False, "object"),
(np.nan, False, "NaN"),
(None, False, "None"),
]
objs, expected, ids = zip(*ll_params)


Expand Down
20 changes: 16 additions & 4 deletions pandas/tests/groupby/methods/test_value_counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,8 +347,12 @@ def test_against_frame_and_seriesgroupby(
expected.name = name
if as_index:
index_frame = expected.index.to_frame(index=False)
index_frame["gender"] = index_frame["both"].str.split("-").str.get(0)
index_frame["education"] = index_frame["both"].str.split("-").str.get(1)
index_frame["gender"] = (
index_frame["both"].astype(str).str.split("-").map(lambda x: x[0])
)
index_frame["education"] = (
index_frame["both"].astype(str).str.split("-").map(lambda x: x[1])
)
del index_frame["both"]
index_frame2 = index_frame.rename({0: None}, axis=1)
expected.index = MultiIndex.from_frame(index_frame2)
Expand All @@ -360,8 +364,16 @@ def test_against_frame_and_seriesgroupby(
expected.index.names = [None] + expected.index.names[1:]
tm.assert_series_equal(result, expected)
else:
expected.insert(1, "gender", expected["both"].str.split("-").str.get(0))
expected.insert(2, "education", expected["both"].str.split("-").str.get(1))
expected.insert(
1,
"gender",
expected["both"].astype(str).str.split("-").map(lambda x: x[0]),
)
expected.insert(
2,
"education",
expected["both"].astype(str).str.split("-").map(lambda x: x[1]),
)
if using_infer_string:
expected = expected.astype({"gender": "str", "education": "str"})
del expected["both"]
Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/io/sas/test_sas7bdat.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,9 @@ def test_iterator_read_too_much(self, dirpath):
tm.assert_frame_equal(d1, d2)


@pytest.mark.filterwarnings(
"ignore:.str accessor on object dtype is deprecated:FutureWarning"
)
def test_encoding_options(datapath):
fname = datapath("io", "sas", "data", "test1.sas7bdat")
df1 = pd.read_sas(fname)
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/series/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ def test_attrs(self):
result = s + 1
assert result.attrs == {"version": 1}

@pytest.mark.filterwarnings("ignore:.str accessor:FutureWarning")
def test_inspect_getmembers(self):
# GH38782
ser = Series(dtype=object)
Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/strings/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@
)
from pandas.core.strings.accessor import StringMethods

pytestmark = pytest.mark.filterwarnings(
"ignore:.str accessor on object dtype:FutureWarning"
)

# subset of the full set from pandas/conftest.py
_any_allowed_skipna_inferred_dtype = [
("string", ["a", np.nan, "c"]),
Expand Down Expand Up @@ -214,3 +218,11 @@ def test_api_for_categorical(any_string_method, any_string_dtype):
else:
# str.cat(others=None) returns string, for example
assert result == expected


def test_object_str_deprecated():
# GH#29710
ser = Series(["a", "b", "c"], dtype=object)
msg = ".str accessor on object dtype"
with tm.assert_produces_warning(FutureWarning, match=msg):
ser.str
4 changes: 4 additions & 0 deletions pandas/tests/strings/test_case_justify.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@
_testing as tm,
)

pytestmark = pytest.mark.filterwarnings(
"ignore:.str accessor on object dtype:FutureWarning"
)


def test_title(any_string_dtype):
s = Series(["FOO", "BAR", np.nan, "Blah", "blurg"], dtype=any_string_dtype)
Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/strings/test_cat.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@
option_context,
)

pytestmark = pytest.mark.filterwarnings(
"ignore:.str accessor on object dtype:FutureWarning"
)


@pytest.fixture
def index_or_series2(index_or_series):
Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/strings/test_extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@
_testing as tm,
)

pytestmark = pytest.mark.filterwarnings(
"ignore:.str accessor on object dtype:FutureWarning"
)


def test_extract_expand_kwarg_wrong_type_raises(any_string_dtype):
# TODO: should this raise TypeError
Expand Down
10 changes: 9 additions & 1 deletion pandas/tests/strings/test_find_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@
is_object_or_nan_string_dtype,
)

pytestmark = pytest.mark.filterwarnings(
"ignore:.str accessor on object dtype:FutureWarning"
)

# --------------------------------------------------------------------------------------
# str.contains
# --------------------------------------------------------------------------------------
Expand Down Expand Up @@ -1095,6 +1099,9 @@ def test_translate_mixed_object():
# --------------------------------------------------------------------------------------


@pytest.mark.filterwarnings(
"ignore:.str accessor on object dtype is deprecated:FutureWarning"
)
def test_flags_kwarg(any_string_dtype):
data = {
"Dave": "[email protected]",
Expand All @@ -1121,7 +1128,8 @@ def test_flags_kwarg(any_string_dtype):
result = data.str.count(pat, flags=re.IGNORECASE)
assert result.iloc[0] == 1

data_str = data.str
msg = "has match groups"
with tm.assert_produces_warning(UserWarning, match=msg):
result = data.str.contains(pat, flags=re.IGNORECASE)
result = data_str.contains(pat, flags=re.IGNORECASE)
assert result.iloc[0]
4 changes: 4 additions & 0 deletions pandas/tests/strings/test_get_dummies.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
_testing as tm,
)

pytestmark = pytest.mark.filterwarnings(
"ignore:.str accessor on object dtype:FutureWarning"
)


def test_get_dummies(any_string_dtype):
s = Series(["a|b", "a|c", np.nan], dtype=any_string_dtype)
Expand Down
Loading
Loading