Skip to content

Commit 67057fe

Browse files
committed
BUG: Raise on coercion of ambiguous datetime strings to datetime64
1 parent cfe54bd commit 67057fe

File tree

3 files changed

+59
-0
lines changed

3 files changed

+59
-0
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -907,6 +907,7 @@ Other
907907
- Bug in printing a :class:`Series` with a :class:`DataFrame` stored in :attr:`Series.attrs` raised a ``ValueError`` (:issue:`60568`)
908908
- Fixed bug where the :class:`DataFrame` constructor misclassified array-like objects with a ``.name`` attribute as :class:`Series` or :class:`Index` (:issue:`61443`)
909909
- Fixed regression in :meth:`DataFrame.from_records` not initializing subclasses properly (:issue:`57008`)
910+
- Bug in input validation when coercing object-dtype arrays containing ambiguous datetime strings to ``datetime64`` that could result in silently inconsistent parsing. (:issue:`61353``)
910911

911912
.. ***DO NOT USE THIS SECTION***
912913

pandas/core/internals/blocks.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2177,6 +2177,40 @@ def maybe_coerce_values(values: ArrayLike) -> ArrayLike:
21772177
if isinstance(values, np.ndarray):
21782178
values = ensure_wrapped_if_datetimelike(values)
21792179

2180+
_date_like_re = re.compile(r"\d{1,4}[/\-]\d{1,2}[/\-]\d{1,4}")
2181+
if (
2182+
values.dtype == object
2183+
and values.ndim == 1
2184+
and len(values) > 0
2185+
and all(isinstance(x, str) and _date_like_re.match(x) for x in values)
2186+
):
2187+
print("[DEBUG] matched ambiguous datetime regex:", values)
2188+
from pandas.core.tools.datetimes import (
2189+
_guess_datetime_format_for_array,
2190+
to_datetime,
2191+
)
2192+
2193+
with warnings.catch_warnings():
2194+
warnings.simplefilter("ignore", UserWarning)
2195+
fmt = _guess_datetime_format_for_array(values)
2196+
2197+
if fmt is None:
2198+
raise ValueError(
2199+
"Ambiguous datetime string format detected. "
2200+
"Specify a format via `pd.to_datetime(..., format=...)` "
2201+
"or use `dayfirst=True`."
2202+
)
2203+
2204+
try:
2205+
# Validate consistent parsing
2206+
to_datetime(values, format=fmt, dayfirst=False)
2207+
except ValueError:
2208+
raise ValueError(
2209+
"Inconsistent or ambiguous datetime strings detected. "
2210+
"Specify `format=...` "
2211+
"or use `dayfirst=True` to ensure correct parsing."
2212+
) from None
2213+
21802214
if issubclass(values.dtype.type, str):
21812215
values = np.array(values, dtype=object)
21822216

pandas/tests/frame/indexing/test_setitem.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -816,6 +816,18 @@ def test_setitem_index_object_dtype_not_inferring(self):
816816
)
817817
tm.assert_frame_equal(df, expected)
818818

819+
def test_setitem_with_ambiguous_datetime_strings_raises(self):
820+
df = DataFrame({"a": date_range("2020-01-01", periods=2)})
821+
with pytest.raises(
822+
ValueError,
823+
match=(
824+
"(?i)ambiguous datetime string format|"
825+
"inconsistent or ambiguous datetime strings"
826+
),
827+
):
828+
ambiguous_dates = Series(["12/01/2020", "13/01/2020"], dtype=object)
829+
df.loc[:, "a"] = ambiguous_dates
830+
819831

820832
class TestSetitemTZAwareValues:
821833
@pytest.fixture
@@ -1399,3 +1411,15 @@ def test_setitem_partial_row_multiple_columns():
13991411
}
14001412
)
14011413
tm.assert_frame_equal(df, expected)
1414+
1415+
1416+
def test_constructor_with_ambiguous_datetime_strings_raises():
1417+
with pytest.raises(
1418+
ValueError,
1419+
match=(
1420+
"(?i)ambiguous datetime string format|"
1421+
"inconsistent or ambiguous datetime strings"
1422+
),
1423+
):
1424+
df = DataFrame({"a": Series(["12/01/2020", "13/01/2020"], dtype="object")})
1425+
df.astype({"a": "datetime64[ns]"})

0 commit comments

Comments
 (0)