pandas-dev · jbrockmendel · Aug 20, 2025
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -621,6 +621,7 @@ Other Deprecations
 - Deprecated lowercase strings ``w``, ``w-mon``, ``w-tue``, etc. denoting frequencies in :class:`Week` in favour of ``W``, ``W-MON``, ``W-TUE``, etc. (:issue:`58998`)
 - Deprecated parameter ``method`` in :meth:`DataFrame.reindex_like` / :meth:`Series.reindex_like` (:issue:`58667`)
 - Deprecated strings ``w``, ``d``, ``MIN``, ``MS``, ``US`` and ``NS`` denoting units in :class:`Timedelta` in favour of ``W``, ``D``, ``min``, ``ms``, ``us`` and ``ns`` (:issue:`59051`)
+- Deprecated the ``.str`` accessor for ``object`` dtype :class:`Series`; explicitly cast to ``"str"`` dtype before using the accessor instead (:issue:`29710`)
 - Deprecated the ``arg`` parameter of ``Series.map``; pass the added ``func`` argument instead. (:issue:`61260`)
 - Deprecated using ``epoch`` date format in :meth:`DataFrame.to_json` and :meth:`Series.to_json`, use ``iso`` instead. (:issue:`57063`)
 

diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py
@@ -41,7 +41,10 @@ def _dir_additions(self) -> set[str]:
         """
         Add additional __dir__ for this object.
         """
-        return {accessor for accessor in self._accessors if hasattr(self, accessor)}
+        with warnings.catch_warnings():
+            # Don't issue warning about .str accessor on object dtype
+            warnings.filterwarnings("ignore")
+            return {accessor for accessor in self._accessors if hasattr(self, accessor)}
 
     def __dir__(self) -> list[str]:
         """

diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
@@ -49,6 +49,7 @@
 from pandas.core.dtypes.missing import isna
 
 from pandas.core.arrays import ExtensionArray
+from pandas.core.arrays.string_ import StringDtype
 from pandas.core.base import NoNewAttributesMixin
 from pandas.core.construction import extract_array
 
@@ -203,8 +204,6 @@ class StringMethods(NoNewAttributesMixin):
     # * extractall
 
     def __init__(self, data) -> None:
-        from pandas.core.arrays.string_ import StringDtype
-
         self._inferred_dtype = self._validate(data)
         self._is_categorical = isinstance(data.dtype, CategoricalDtype)
         self._is_string = isinstance(data.dtype, StringDtype)
@@ -255,6 +254,14 @@ def _validate(data):
         data = extract_array(data)
 
         values = getattr(data, "categories", data)  # categorical / normal
+        if data.dtype == object and get_option("future.infer_string"):
+            warnings.warn(
+                # GH#29710
+                ".str accessor on object dtype is deprecated. Explicitly cast "
+                "to 'str' dtype instead.",
+                FutureWarning,
+                stacklevel=find_stack_level(),
+            )
 
         inferred_dtype = lib.infer_dtype(values, skipna=True)
 
@@ -3875,7 +3882,6 @@ def _result_dtype(arr):
     # workaround #27953
     # ideally we just pass `dtype=arr.dtype` unconditionally, but this fails
     # when the list of values is empty.
-    from pandas.core.arrays.string_ import StringDtype
 
     if isinstance(arr.dtype, (ArrowDtype, StringDtype)):
         return arr.dtype

diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
@@ -5278,11 +5278,16 @@ def _convert_string_array(data: np.ndarray, encoding: str, errors: str) -> np.nd
     """
     # encode if needed
     if len(data):
-        data = (
-            Series(data.ravel(), copy=False, dtype="object")
-            .str.encode(encoding, errors)
-            ._values.reshape(data.shape)
+        # We can _almost_ do ser.astype("str").str.encode(encoding, errors)
+        #  But the conversion to "str" can fail in e.g. test_to_hdf_errors
+        ser = Series(data.ravel(), copy=False, dtype="object")
+        arr = np.asarray(ser)
+        func = lambda x: x.encode(encoding, errors=errors)
+        mask = isna(arr)
+        result = lib.map_infer_mask(
+            arr, func, mask.view(np.uint8), convert=not np.all(mask)
         )
+        data = result.reshape(data.shape)
 
     # create the sized dtype
     ensured = ensure_object(data.ravel())
@@ -5319,9 +5324,13 @@ def _unconvert_string_array(
         dtype = f"U{itemsize}"
 
         if isinstance(data[0], bytes):
-            ser = Series(data, copy=False).str.decode(
-                encoding, errors=errors, dtype="object"
-            )
+            with warnings.catch_warnings():
+                warnings.filterwarnings(
+                    "ignore", ".str accessor on object dtype is deprecated"
+                )
+                ser = Series(data, copy=False).str.decode(
+                    encoding, errors=errors, dtype="object"
+                )
             data = ser.to_numpy()
             data.flags.writeable = True
         else:

diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py
@@ -19,6 +19,7 @@
 from datetime import datetime
 import sys
 from typing import TYPE_CHECKING
+import warnings
 
 import numpy as np
 
@@ -717,7 +718,11 @@ def _chunk_to_dataframe(self) -> DataFrame:
             elif self._column_types[j] == b"s":
                 rslt[name] = pd.Series(self._string_chunk[js, :], index=ix, copy=False)
                 if self.convert_text and (self.encoding is not None):
-                    rslt[name] = self._decode_string(rslt[name].str)
+                    with warnings.catch_warnings():
+                        warnings.filterwarnings(
+                            "ignore", ".str accessor on object dtype is deprecated"
+                        )
+                        rslt[name] = self._decode_string(rslt[name].str)
                     if infer_string:
                         rslt[name] = rslt[name].astype("str")
 

diff --git a/pandas/io/stata.py b/pandas/io/stata.py
@@ -2744,7 +2744,7 @@ def _encode_strings(self) -> None:
 types cannot be exported and must first be converted to one of the
 supported types."""
                     )
-                encoded = self.data[col].str.encode(self._encoding)
+                encoded = self.data[col].astype("str").str.encode(self._encoding)
                 # If larger than _max_string_length do nothing
                 if (
                     max_len_string_array(ensure_object(self.data[col]._values))

diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py
@@ -25,6 +25,7 @@
     Generic,
     TypeVar,
 )
+import warnings
 
 import numpy as np
 import pytest
@@ -134,58 +135,61 @@ def shape(self):
 
 # collect all objects to be tested for list-like-ness; use tuples of objects,
 # whether they are list-like or not (special casing for sets), and their ID
-ll_params = [
-    ([1], True, "list"),
-    ([], True, "list-empty"),
-    ((1,), True, "tuple"),
-    ((), True, "tuple-empty"),
-    ({"a": 1}, True, "dict"),
-    ({}, True, "dict-empty"),
-    ({"a", 1}, "set", "set"),
-    (set(), "set", "set-empty"),
-    (frozenset({"a", 1}), "set", "frozenset"),
-    (frozenset(), "set", "frozenset-empty"),
-    (iter([1, 2]), True, "iterator"),
-    (iter([]), True, "iterator-empty"),
-    ((x for x in [1, 2]), True, "generator"),
-    ((_ for _ in []), True, "generator-empty"),
-    (Series([1]), True, "Series"),
-    (Series([], dtype=object), True, "Series-empty"),
-    # Series.str will still raise a TypeError if iterated
-    (Series(["a"]).str, True, "StringMethods"),
-    (Series([], dtype="O").str, True, "StringMethods-empty"),
-    (Index([1]), True, "Index"),
-    (Index([]), True, "Index-empty"),
-    (DataFrame([[1]]), True, "DataFrame"),
-    (DataFrame(), True, "DataFrame-empty"),
-    (np.ndarray((2,) * 1), True, "ndarray-1d"),
-    (np.array([]), True, "ndarray-1d-empty"),
-    (np.ndarray((2,) * 2), True, "ndarray-2d"),
-    (np.array([[]]), True, "ndarray-2d-empty"),
-    (np.ndarray((2,) * 3), True, "ndarray-3d"),
-    (np.array([[[]]]), True, "ndarray-3d-empty"),
-    (np.ndarray((2,) * 4), True, "ndarray-4d"),
-    (np.array([[[[]]]]), True, "ndarray-4d-empty"),
-    (np.array(2), False, "ndarray-0d"),
-    (MockNumpyLikeArray(np.ndarray((2,) * 1)), True, "duck-ndarray-1d"),
-    (MockNumpyLikeArray(np.array([])), True, "duck-ndarray-1d-empty"),
-    (MockNumpyLikeArray(np.ndarray((2,) * 2)), True, "duck-ndarray-2d"),
-    (MockNumpyLikeArray(np.array([[]])), True, "duck-ndarray-2d-empty"),
-    (MockNumpyLikeArray(np.ndarray((2,) * 3)), True, "duck-ndarray-3d"),
-    (MockNumpyLikeArray(np.array([[[]]])), True, "duck-ndarray-3d-empty"),
-    (MockNumpyLikeArray(np.ndarray((2,) * 4)), True, "duck-ndarray-4d"),
-    (MockNumpyLikeArray(np.array([[[[]]]])), True, "duck-ndarray-4d-empty"),
-    (MockNumpyLikeArray(np.array(2)), False, "duck-ndarray-0d"),
-    (1, False, "int"),
-    (b"123", False, "bytes"),
-    (b"", False, "bytes-empty"),
-    ("123", False, "string"),
-    ("", False, "string-empty"),
-    (str, False, "string-type"),
-    (object(), False, "object"),
-    (np.nan, False, "NaN"),
-    (None, False, "None"),
-]
+with warnings.catch_warnings():
+    # suppress warning on "StringMethods-empty" with object dtype
+    warnings.filterwarnings("ignore", ".str accessor on object dtype is deprecated")
+    ll_params = [
+        ([1], True, "list"),
+        ([], True, "list-empty"),
+        ((1,), True, "tuple"),
+        ((), True, "tuple-empty"),
+        ({"a": 1}, True, "dict"),
+        ({}, True, "dict-empty"),
+        ({"a", 1}, "set", "set"),
+        (set(), "set", "set-empty"),
+        (frozenset({"a", 1}), "set", "frozenset"),
+        (frozenset(), "set", "frozenset-empty"),
+        (iter([1, 2]), True, "iterator"),
+        (iter([]), True, "iterator-empty"),
+        ((x for x in [1, 2]), True, "generator"),
+        ((_ for _ in []), True, "generator-empty"),
+        (Series([1]), True, "Series"),
+        (Series([], dtype=object), True, "Series-empty"),
+        # Series.str will still raise a TypeError if iterated
+        (Series(["a"]).str, True, "StringMethods"),
+        (Series([], dtype="O").str, True, "StringMethods-empty"),
+        (Index([1]), True, "Index"),
+        (Index([]), True, "Index-empty"),
+        (DataFrame([[1]]), True, "DataFrame"),
+        (DataFrame(), True, "DataFrame-empty"),
+        (np.ndarray((2,) * 1), True, "ndarray-1d"),
+        (np.array([]), True, "ndarray-1d-empty"),
+        (np.ndarray((2,) * 2), True, "ndarray-2d"),
+        (np.array([[]]), True, "ndarray-2d-empty"),
+        (np.ndarray((2,) * 3), True, "ndarray-3d"),
+        (np.array([[[]]]), True, "ndarray-3d-empty"),
+        (np.ndarray((2,) * 4), True, "ndarray-4d"),
+        (np.array([[[[]]]]), True, "ndarray-4d-empty"),
+        (np.array(2), False, "ndarray-0d"),
+        (MockNumpyLikeArray(np.ndarray((2,) * 1)), True, "duck-ndarray-1d"),
+        (MockNumpyLikeArray(np.array([])), True, "duck-ndarray-1d-empty"),
+        (MockNumpyLikeArray(np.ndarray((2,) * 2)), True, "duck-ndarray-2d"),
+        (MockNumpyLikeArray(np.array([[]])), True, "duck-ndarray-2d-empty"),
+        (MockNumpyLikeArray(np.ndarray((2,) * 3)), True, "duck-ndarray-3d"),
+        (MockNumpyLikeArray(np.array([[[]]])), True, "duck-ndarray-3d-empty"),
+        (MockNumpyLikeArray(np.ndarray((2,) * 4)), True, "duck-ndarray-4d"),
+        (MockNumpyLikeArray(np.array([[[[]]]])), True, "duck-ndarray-4d-empty"),
+        (MockNumpyLikeArray(np.array(2)), False, "duck-ndarray-0d"),
+        (1, False, "int"),
+        (b"123", False, "bytes"),
+        (b"", False, "bytes-empty"),
+        ("123", False, "string"),
+        ("", False, "string-empty"),
+        (str, False, "string-type"),
+        (object(), False, "object"),
+        (np.nan, False, "NaN"),
+        (None, False, "None"),
+    ]
 objs, expected, ids = zip(*ll_params)
 
 

diff --git a/pandas/tests/groupby/methods/test_value_counts.py b/pandas/tests/groupby/methods/test_value_counts.py
@@ -347,8 +347,12 @@ def test_against_frame_and_seriesgroupby(
         expected.name = name
         if as_index:
             index_frame = expected.index.to_frame(index=False)
-            index_frame["gender"] = index_frame["both"].str.split("-").str.get(0)
-            index_frame["education"] = index_frame["both"].str.split("-").str.get(1)
+            index_frame["gender"] = (
+                index_frame["both"].astype(str).str.split("-").map(lambda x: x[0])
+            )
+            index_frame["education"] = (
+                index_frame["both"].astype(str).str.split("-").map(lambda x: x[1])
+            )
             del index_frame["both"]
             index_frame2 = index_frame.rename({0: None}, axis=1)
             expected.index = MultiIndex.from_frame(index_frame2)
@@ -360,8 +364,16 @@ def test_against_frame_and_seriesgroupby(
                 expected.index.names = [None] + expected.index.names[1:]
             tm.assert_series_equal(result, expected)
         else:
-            expected.insert(1, "gender", expected["both"].str.split("-").str.get(0))
-            expected.insert(2, "education", expected["both"].str.split("-").str.get(1))
+            expected.insert(
+                1,
+                "gender",
+                expected["both"].astype(str).str.split("-").map(lambda x: x[0]),
+            )
+            expected.insert(
+                2,
+                "education",
+                expected["both"].astype(str).str.split("-").map(lambda x: x[1]),
+            )
             if using_infer_string:
                 expected = expected.astype({"gender": "str", "education": "str"})
             del expected["both"]

diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py
@@ -109,6 +109,9 @@ def test_iterator_read_too_much(self, dirpath):
         tm.assert_frame_equal(d1, d2)
 
 
+@pytest.mark.filterwarnings(
+    "ignore:.str accessor on object dtype is deprecated:FutureWarning"
+)
 def test_encoding_options(datapath):
     fname = datapath("io", "sas", "data", "test1.sas7bdat")
     df1 = pd.read_sas(fname)

diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py
@@ -160,6 +160,7 @@ def test_attrs(self):
         result = s + 1
         assert result.attrs == {"version": 1}
 
+    @pytest.mark.filterwarnings("ignore:.str accessor:FutureWarning")
     def test_inspect_getmembers(self):
         # GH38782
         ser = Series(dtype=object)

diff --git a/pandas/tests/strings/test_api.py b/pandas/tests/strings/test_api.py
@@ -14,6 +14,10 @@
 )
 from pandas.core.strings.accessor import StringMethods
 
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:.str accessor on object dtype:FutureWarning"
+)
+
 # subset of the full set from pandas/conftest.py
 _any_allowed_skipna_inferred_dtype = [
     ("string", ["a", np.nan, "c"]),
@@ -214,3 +218,11 @@ def test_api_for_categorical(any_string_method, any_string_dtype):
     else:
         # str.cat(others=None) returns string, for example
         assert result == expected
+
+
+def test_object_str_deprecated():
+    # GH#29710
+    ser = Series(["a", "b", "c"], dtype=object)
+    msg = ".str accessor on object dtype"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        ser.str
diff --git a/pandas/tests/strings/test_case_justify.py b/pandas/tests/strings/test_case_justify.py
@@ -9,6 +9,10 @@
     _testing as tm,
 )
 
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:.str accessor on object dtype:FutureWarning"
+)
+
 
 def test_title(any_string_dtype):
     s = Series(["FOO", "BAR", np.nan, "Blah", "blurg"], dtype=any_string_dtype)

diff --git a/pandas/tests/strings/test_cat.py b/pandas/tests/strings/test_cat.py
@@ -15,6 +15,10 @@
     option_context,
 )
 
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:.str accessor on object dtype:FutureWarning"
+)
+
 
 @pytest.fixture
 def index_or_series2(index_or_series):

diff --git a/pandas/tests/strings/test_extract.py b/pandas/tests/strings/test_extract.py
@@ -14,6 +14,10 @@
     _testing as tm,
 )
 
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:.str accessor on object dtype:FutureWarning"
+)
+
 
 def test_extract_expand_kwarg_wrong_type_raises(any_string_dtype):
     # TODO: should this raise TypeError

diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py
@@ -16,6 +16,10 @@
     is_object_or_nan_string_dtype,
 )
 
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:.str accessor on object dtype:FutureWarning"
+)
+
 # --------------------------------------------------------------------------------------
 # str.contains
 # --------------------------------------------------------------------------------------
@@ -1095,6 +1099,9 @@ def test_translate_mixed_object():
 # --------------------------------------------------------------------------------------
 
 
+@pytest.mark.filterwarnings(
+    "ignore:.str accessor on object dtype is deprecated:FutureWarning"
+)
 def test_flags_kwarg(any_string_dtype):
     data = {
         "Dave": "[email protected]",
@@ -1121,7 +1128,8 @@ def test_flags_kwarg(any_string_dtype):
     result = data.str.count(pat, flags=re.IGNORECASE)
     assert result.iloc[0] == 1
 
+    data_str = data.str
     msg = "has match groups"
     with tm.assert_produces_warning(UserWarning, match=msg):
-        result = data.str.contains(pat, flags=re.IGNORECASE)
+        result = data_str.contains(pat, flags=re.IGNORECASE)
     assert result.iloc[0]
diff --git a/pandas/tests/strings/test_get_dummies.py b/pandas/tests/strings/test_get_dummies.py
@@ -11,6 +11,10 @@
     _testing as tm,
 )
 
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:.str accessor on object dtype:FutureWarning"
+)
+
 
 def test_get_dummies(any_string_dtype):
     s = Series(["a|b", "a|c", np.nan], dtype=any_string_dtype)