diff --git a/changes/2755.bugfix.rst b/changes/2755.bugfix.rst new file mode 100644 index 0000000000..2555369544 --- /dev/null +++ b/changes/2755.bugfix.rst @@ -0,0 +1,3 @@ +The array returned by ``zarr.empty`` and an empty ``zarr.core.buffer.cpu.NDBuffer`` will now be filled with the +specified fill value, or with zeros if no fill value is provided. +This fixes a bug where Zarr format 2 data with no fill value was written with un-predictable chunk sizes. \ No newline at end of file diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index 8eba4fc152..91431ad76b 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -1065,7 +1065,8 @@ async def create( async def empty( shape: ChunkCoords, **kwargs: Any ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: - """Create an empty array. + """Create an empty array with the specified shape. The contents will be filled with the + array's fill value or zeros if no fill value is provided. Parameters ---------- @@ -1073,12 +1074,6 @@ async def empty( Shape of the empty array. **kwargs Keyword arguments passed to :func:`zarr.api.asynchronous.create`. - - Notes - ----- - The contents of an empty Zarr array are not defined. On attempting to - retrieve data from an empty Zarr array, any values may be returned, - and these are not guaranteed to be stable from one access to the next. """ return await create(shape=shape, fill_value=None, **kwargs) @@ -1087,7 +1082,8 @@ async def empty( async def empty_like( a: ArrayLike, **kwargs: Any ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: - """Create an empty array like `a`. + """Create an empty array like `a`. The contents will be filled with the + array's fill value or zeros if no fill value is provided. Parameters ---------- diff --git a/src/zarr/api/synchronous.py b/src/zarr/api/synchronous.py index 305446ec97..efeb65265f 100644 --- a/src/zarr/api/synchronous.py +++ b/src/zarr/api/synchronous.py @@ -902,7 +902,8 @@ def create_array( # TODO: add type annotations for kwargs def empty(shape: ChunkCoords, **kwargs: Any) -> Array: - """Create an empty array. + """Create an empty array with the specified shape. The contents will be filled with the + array's fill value or zeros if no fill value is provided. Parameters ---------- @@ -915,12 +916,6 @@ def empty(shape: ChunkCoords, **kwargs: Any) -> Array: ------- Array The new array. - - Notes - ----- - The contents of an empty Zarr array are not defined. On attempting to - retrieve data from an empty Zarr array, any values may be returned, - and these are not guaranteed to be stable from one access to the next. """ return Array(sync(async_api.empty(shape, **kwargs))) @@ -928,7 +923,8 @@ def empty(shape: ChunkCoords, **kwargs: Any) -> Array: # TODO: move ArrayLike to common module # TODO: add type annotations for kwargs def empty_like(a: ArrayLike, **kwargs: Any) -> Array: - """Create an empty array like another array. + """Create an empty array like another array. The contents will be filled with the + array's fill value or zeros if no fill value is provided. Parameters ---------- diff --git a/src/zarr/core/buffer/cpu.py b/src/zarr/core/buffer/cpu.py index 5019075496..225adb6f5c 100644 --- a/src/zarr/core/buffer/cpu.py +++ b/src/zarr/core/buffer/cpu.py @@ -154,10 +154,10 @@ def create( order: Literal["C", "F"] = "C", fill_value: Any | None = None, ) -> Self: - ret = cls(np.empty(shape=tuple(shape), dtype=dtype, order=order)) - if fill_value is not None: - ret.fill(fill_value) - return ret + if fill_value is None: + return cls(np.zeros(shape=tuple(shape), dtype=dtype, order=order)) + else: + return cls(np.full(shape=tuple(shape), fill_value=fill_value, dtype=dtype, order=order)) @classmethod def from_numpy_array(cls, array_like: npt.ArrayLike) -> Self: diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index 4760923e0b..880ad8945d 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -1498,7 +1498,8 @@ async def tree(self, expand: bool | None = None, level: int | None = None) -> An async def empty( self, *, name: str, shape: ChunkCoords, **kwargs: Any ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: - """Create an empty array in this Group. + """Create an empty array with the specified shape in this Group. The contents will + be filled with the array's fill value or zeros if no fill value is provided. Parameters ---------- @@ -1592,7 +1593,8 @@ async def full( async def empty_like( self, *, name: str, data: async_api.ArrayLike, **kwargs: Any ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: - """Create an empty sub-array like `data`. + """Create an empty sub-array like `data`. The contents will be filled with + the array's fill value or zeros if no fill value is provided. Parameters ---------- @@ -2442,7 +2444,8 @@ def require_array(self, name: str, *, shape: ShapeLike, **kwargs: Any) -> Array: @_deprecate_positional_args def empty(self, *, name: str, shape: ChunkCoords, **kwargs: Any) -> Array: - """Create an empty array in this Group. + """Create an empty array with the specified shape in this Group. The contents will be filled with + the array's fill value or zeros if no fill value is provided. Parameters ---------- @@ -2453,11 +2456,6 @@ def empty(self, *, name: str, shape: ChunkCoords, **kwargs: Any) -> Array: **kwargs Keyword arguments passed to :func:`zarr.api.asynchronous.create`. - Notes - ----- - The contents of an empty Zarr array are not defined. On attempting to - retrieve data from an empty Zarr array, any values may be returned, - and these are not guaranteed to be stable from one access to the next. """ return Array(self._sync(self._async_group.empty(name=name, shape=shape, **kwargs))) @@ -2531,7 +2529,8 @@ def full( @_deprecate_positional_args def empty_like(self, *, name: str, data: async_api.ArrayLike, **kwargs: Any) -> Array: - """Create an empty sub-array like `data`. + """Create an empty sub-array like `data`. The contents will be filled + with the array's fill value or zeros if no fill value is provided. Parameters ---------- diff --git a/tests/test_store/test_memory.py b/tests/test_store/test_memory.py index ba38889b52..f00d75a8f0 100644 --- a/tests/test_store/test_memory.py +++ b/tests/test_store/test_memory.py @@ -1,12 +1,19 @@ from __future__ import annotations +from typing import TYPE_CHECKING + +import numpy as np import pytest +import zarr from zarr.core.buffer import Buffer, cpu, gpu from zarr.storage import GpuMemoryStore, MemoryStore from zarr.testing.store import StoreTests from zarr.testing.utils import gpu_test +if TYPE_CHECKING: + from zarr.core.common import ZarrFormat + class TestMemoryStore(StoreTests[MemoryStore, cpu.Buffer]): store_cls = MemoryStore @@ -46,6 +53,25 @@ def test_store_supports_partial_writes(self, store: MemoryStore) -> None: def test_list_prefix(self, store: MemoryStore) -> None: assert True + @pytest.mark.parametrize("dtype", ["uint8", "float32", "int64"]) + @pytest.mark.parametrize("zarr_format", [2, 3]) + async def test_deterministic_size( + self, store: MemoryStore, dtype, zarr_format: ZarrFormat + ) -> None: + a = zarr.empty( + store=store, + shape=(3,), + chunks=(1000,), + dtype=dtype, + zarr_format=zarr_format, + overwrite=True, + ) + a[...] = 1 + a.resize((1000,)) + + np.testing.assert_array_equal(a[:3], 1) + np.testing.assert_array_equal(a[3:], 0) + @gpu_test class TestGpuMemoryStore(StoreTests[GpuMemoryStore, gpu.Buffer]):