Skip to content

Commit 8ff6ffa

Browse files
committed
Add flexible deserialization for datetime fill values
1 parent 36a1bac commit 8ff6ffa

File tree

2 files changed

+71
-0
lines changed

2 files changed

+71
-0
lines changed

src/zarr/core/metadata/v2.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -327,7 +327,15 @@ def parse_fill_value(fill_value: Any, dtype: np.dtype[Any]) -> Any:
327327
"""
328328

329329
if fill_value is None or dtype.hasobject:
330+
# Pass through None or if dtype is object
330331
pass
332+
elif dtype.kind in "M":
333+
# Check for both string "NaT" and the int64 representation of NaT
334+
if fill_value == "NaT" or fill_value == np.iinfo(np.int64).min:
335+
fill_value = dtype.type("NaT")
336+
else:
337+
fill_value = np.array(fill_value, dtype=dtype)[()]
338+
# Fall through for non-NaT datetime/timedelta values (handled below)
331339
elif dtype.fields is not None:
332340
# the dtype is structured (has multiple fields), so the fill_value might be a
333341
# compound value (e.g., a tuple or dict) that needs field-wise processing.

tests/test_metadata/test_v2.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,69 @@ async def test_getitem_consolidated(self, v2_consolidated_metadata):
277277
assert air.metadata.shape == (730,)
278278

279279

280+
@pytest.mark.parametrize("dtype_str", ["datetime64[s]", "timedelta64[ms]"])
281+
def test_parse_v2_fill_value_nat_integer(dtype_str: str) -> None:
282+
"""Verify parsing V2 metadata where NaT is stored as its int64 representation."""
283+
nat_int_repr = np.iinfo(np.int64).min # -9223372036854775808
284+
dtype = np.dtype(dtype_str)
285+
metadata_dict = {
286+
"zarr_format": 2,
287+
"shape": (10,),
288+
"chunks": (5,),
289+
"dtype": dtype.str,
290+
"compressor": None,
291+
"filters": None,
292+
"fill_value": nat_int_repr,
293+
"order": "C",
294+
}
295+
meta = ArrayV2Metadata.from_dict(metadata_dict)
296+
assert np.isnat(meta.fill_value)
297+
assert meta.fill_value.dtype.kind == dtype.kind
298+
299+
300+
@pytest.mark.parametrize("dtype_str", ["datetime64[s]", "timedelta64[ms]"])
301+
def test_parse_v2_fill_value_nat_string(dtype_str: str) -> None:
302+
"""Verify parsing V2 metadata where NaT is stored as the string 'NaT'."""
303+
dtype = np.dtype(dtype_str)
304+
metadata_dict = {
305+
"zarr_format": 2,
306+
"shape": (10,),
307+
"chunks": (5,),
308+
"dtype": dtype.str,
309+
"compressor": None,
310+
"filters": None,
311+
"fill_value": "NaT",
312+
"order": "C",
313+
}
314+
meta = ArrayV2Metadata.from_dict(metadata_dict)
315+
assert np.isnat(meta.fill_value)
316+
assert meta.fill_value.dtype.kind == dtype.kind
317+
318+
319+
@pytest.mark.parametrize("dtype_str", ["datetime64[s]", "timedelta64[ms]"])
320+
def test_parse_v2_fill_value_non_nat(dtype_str: str) -> None:
321+
"""Verify parsing V2 metadata with a non-NaT datetime/timedelta fill value."""
322+
dtype = np.dtype(dtype_str)
323+
# Use a valid integer representation for the dtype
324+
# Note: zarr v2 serializes non-NaT datetimes/timedeltas as integers
325+
fill_value_int = 1234567890 if dtype.kind == "M" else 12345
326+
expected_value = np.array(fill_value_int, dtype=dtype)[()]
327+
328+
metadata_dict = {
329+
"zarr_format": 2,
330+
"shape": (10,),
331+
"chunks": (5,),
332+
"dtype": dtype.str,
333+
"compressor": None,
334+
"filters": None,
335+
"fill_value": fill_value_int,
336+
"order": "C",
337+
}
338+
meta = ArrayV2Metadata.from_dict(metadata_dict)
339+
assert meta.fill_value == expected_value
340+
assert meta.fill_value.dtype == dtype
341+
342+
280343
def test_from_dict_extra_fields() -> None:
281344
data = {
282345
"_nczarr_array": {"dimrefs": ["/dim1", "/dim2"], "storage": "chunked"},

0 commit comments

Comments
 (0)