diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index c58b4a4be6df1..81e7495f781b2 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -904,6 +904,21 @@ def select( # create the storer and axes where = _ensure_term(where, scope_level=1) s = self._create_storer(group) + + # Raise an error if trying to query tz-aware index + if where is not None: + try: + index = s.obj.index + if hasattr(index, "tz") and index.tz is not None: + raise ValueError( + "Filtering with `where=` on timezone-aware indexes is not supported " + "in HDFStore." + ) + + except AttributeError: + # some storer types (e.g. Legacy format) may not have `obj`; skip check + pass + s.infer_axes() # function to call on iteration diff --git a/pandas/tests/io/pytables/test_timezone_bug.py b/pandas/tests/io/pytables/test_timezone_bug.py new file mode 100644 index 0000000000000..512910870bc7d --- /dev/null +++ b/pandas/tests/io/pytables/test_timezone_bug.py @@ -0,0 +1,16 @@ +import pytest + +import pandas as pd + + +@pytest.mark.parametrize("tz", ["US/Eastern", "Europe/Berlin"]) +def test_hdf_where_query_on_tzindex_raises(tmp_path, tz): + df = pd.DataFrame({"x": range(5)}) + df["dt"] = pd.date_range("2020-01-01", periods=5, tz=tz) + df = df.set_index("dt") + + file_path = tmp_path / "test.h5" + df.to_hdf(file_path, key="df", format="table") + + with pytest.raises(ValueError, match="invalid variable reference"): + pd.read_hdf(file_path, key="df", where='dt=="2020-01-03 00:00:00-05:00"')