scverse · ilan-gold · Oct 21, 2024 · Nov 18, 2024 · Nov 18, 2024 · Nov 18, 2024
diff --git a/.azure-pipelines.yml b/.azure-pipelines.yml
@@ -28,6 +28,7 @@ jobs:
           python.version: "3.10"
           DEPENDENCIES_VERSION: "minimum"
           TEST_TYPE: "coverage"
+
     steps:
       - task: UsePythonVersion@0
         inputs:

diff --git a/docs/conf.py b/docs/conf.py
@@ -111,8 +111,8 @@ def setup(app: Sphinx):
     python=("https://docs.python.org/3", None),
     scipy=("https://docs.scipy.org/doc/scipy", None),
     sklearn=("https://scikit-learn.org/stable", None),
+    zarr=("https://zarr.readthedocs.io/en/stable/", None),
     xarray=("https://docs.xarray.dev/en/stable", None),
-    zarr=("https://zarr.readthedocs.io/en/v2.18.4/", None),
 )
 qualname_overrides = {
     "h5py._hl.group.Group": "h5py.Group",
@@ -128,6 +128,8 @@ def setup(app: Sphinx):
     "anndata._types.WriteCallback": "anndata.experimental.WriteCallback",
     "anndata._types.Read": "anndata.experimental.Read",
     "anndata._types.Write": "anndata.experimental.Write",
+    "zarr.core.array.Array": "zarr.Array",
+    "zarr.core.group.Group": "zarr.Group",
     "anndata.compat.DaskArray": "dask.array.Array",
     "anndata.compat.CupyArray": "cupy.ndarray",
     "anndata.compat.CupySparseMatrix": "cupyx.scipy.sparse.spmatrix",

diff --git a/docs/fileformat-prose.md b/docs/fileformat-prose.md
@@ -91,7 +91,7 @@ Using this information, we're able to dispatch onto readers for the different el
 ## Dense arrays
 
 Dense numeric arrays have the most simple representation on disk,
-as they have native equivalents in H5py {doc}`h5py:high/dataset` and Zarr {ref}`Arrays <zarr:tutorial_create>`.
+as they have native equivalents in H5py {doc}`h5py:high/dataset` and Zarr {doc}`Arrays <zarr:user-guide/arrays>`.
 We can see an example of this with dimensionality reductions stored in the `obsm` group:
 
 `````{tab-set}

diff --git a/pyproject.toml b/pyproject.toml
@@ -49,6 +49,7 @@ dependencies = [
     # array-api-compat 1.5 has https://github.com/scverse/anndata/issues/1410
     "array_api_compat>1.4,!=1.5",
     "legacy-api-wrap",
+    "zarr",
 ]
 dynamic = ["version"]
 
@@ -74,7 +75,6 @@ doc = [
     "sphinxext.opengraph",
     "nbsphinx",
     "scanpydoc[theme,typehints] >=0.15.1",
-    "zarr<3",
     "awkward>=2.3",
     "IPython",                          # For syntax highlighting in notebooks
     "myst_parser",
@@ -88,7 +88,6 @@ test = [
     "loompy>=3.0.5",
     "pytest>=8.2,<8.3.4",
     "pytest-cov>=2.10",
-    "zarr<3",
     "matplotlib",
     "scikit-learn",
     "openpyxl",
@@ -149,6 +148,7 @@ filterwarnings_when_strict = [
     "default:(Observation|Variable) names are not unique. To make them unique:UserWarning",
     "default::scipy.sparse.SparseEfficiencyWarning",
     "default::dask.array.core.PerformanceWarning",
+    "default:anndata will no longer support zarr v2:FutureWarning"
 ]
 python_files = "test_*.py"
 testpaths = [

diff --git a/src/anndata/_core/anndata.py b/src/anndata/_core/anndata.py
@@ -1944,7 +1944,7 @@
     def write_zarr(
         self,
         store: MutableMapping | PathLike,
-        chunks: bool | int | tuple[int, ...] | None = None,
+        chunks: tuple[int, ...] | None = None,
     ):
         """\
         Write a hierarchical Zarr array store.
@@ -1958,6 +1958,13 @@
         """
         from ..io import write_zarr
 
+        # TODO: What is a bool for chunks supposed to do?
+        if isinstance(chunks, bool):
+            msg = (
+                "Passing `write_zarr(adata, chunks=True)` is no longer supported. "
+                "Please pass `write_zarr(adata)` instead."
+            )
+            raise ValueError(msg)
         write_zarr(store, self, chunks=chunks)
 
     def chunked_X(self, chunk_size: int | None = None):

diff --git a/src/anndata/_core/sparse_dataset.py b/src/anndata/_core/sparse_dataset.py
@@ -30,7 +30,7 @@
 
 from .. import abc
 from .._settings import settings
-from ..compat import H5Group, SpArray, ZarrArray, ZarrGroup, _read_attr
+from ..compat import H5Group, SpArray, ZarrArray, ZarrGroup, _read_attr, is_zarr_v2
 from .index import _fix_slice_bounds, _subset, unpack_index
 
 if TYPE_CHECKING:
@@ -73,13 +73,22 @@ def copy(self) -> ss.csr_matrix | ss.csc_matrix:
         if isinstance(self.data, ZarrArray):
             import zarr
 
-            return sparse_dataset(
-                zarr.open(
+            if is_zarr_v2():
+                sparse_group = zarr.open(
                     store=self.data.store,
                     mode="r",
                     chunk_store=self.data.chunk_store,  # chunk_store is needed, not clear why
                 )[Path(self.data.path).parent]
-            ).to_memory()
+            else:
+                anndata_group = zarr.open_group(store=self.data.store, mode="r")
+                sparse_group = anndata_group[
+                    str(
+                        Path(str(self.data.store_path))
+                        .relative_to(str(anndata_group.store_path))
+                        .parent
+                    )
+                ]
+            return sparse_dataset(sparse_group).to_memory()
         return super().copy()
 
     def _set_many(self, i: Iterable[int], j: Iterable[int], x):
@@ -534,9 +543,9 @@ def append(self, sparse_matrix: ss.csr_matrix | ss.csc_matrix | SpArray) -> None
                 f"{self.format!r} and {sparse_matrix.format!r}"
             )
             raise ValueError(msg)
-        indptr_offset = len(self.group["indices"])
+        [indptr_offset] = self.group["indices"].shape
         if self.group["indptr"].dtype == np.int32:
-            new_nnz = indptr_offset + len(sparse_matrix.indices)
+            new_nnz = indptr_offset + sparse_matrix.indices.shape[0]
             if new_nnz >= np.iinfo(np.int32).max:
                 msg = (
                     "This array was written with a 32 bit intptr, but is now large "
@@ -567,7 +576,13 @@ def append(self, sparse_matrix: ss.csr_matrix | ss.csc_matrix | SpArray) -> None
         data = self.group["data"]
         orig_data_size = data.shape[0]
         data.resize((orig_data_size + sparse_matrix.data.shape[0],))
-        data[orig_data_size:] = sparse_matrix.data
+        # see https://github.com/zarr-developers/zarr-python/discussions/2712 for why we need to read first
+        append_data = sparse_matrix.data
+        append_indices = sparse_matrix.indices
+        if isinstance(sparse_matrix.data, ZarrArray) and not is_zarr_v2():
+            append_data = append_data[...]
+            append_indices = append_indices[...]
+        data[orig_data_size:] = append_data
 
         # indptr
         indptr = self.group["indptr"]
@@ -581,7 +596,7 @@ def append(self, sparse_matrix: ss.csr_matrix | ss.csc_matrix | SpArray) -> None
         indices = self.group["indices"]
         orig_data_size = indices.shape[0]
         indices.resize((orig_data_size + sparse_matrix.indices.shape[0],))
-        indices[orig_data_size:] = sparse_matrix.indices
+        indices[orig_data_size:] = append_indices
 
         # Clear cached property
         for attr in ["_indptr", "_indices", "_data"]:

diff --git a/src/anndata/_io/h5ad.py b/src/anndata/_io/h5ad.py
@@ -21,6 +21,7 @@
     _clean_uns,
     _decode_structured_array,
     _from_fixed_length_strings,
+    is_zarr_v2,
 )
 from ..experimental import read_dispatched
 from .specs import read_elem, write_elem
@@ -38,6 +39,7 @@
     from typing import Any, Literal
 
     from .._core.file_backing import AnnDataFileManager
+    from .._types import GroupStorageType
 
 T = TypeVar("T")
 
@@ -113,7 +115,7 @@
 @report_write_key_on_error
 @write_spec(IOSpec("array", "0.2.0"))
 def write_sparse_as_dense(
-    f: h5py.Group,
+    f: GroupStorageType,
     key: str,
     value: sparse.spmatrix | BaseCompressedSparseDataset,
     *,
@@ -129,7 +131,14 @@
             key = re.sub(r"(.*)(\w(?!.*/))", r"\1_\2", key.rstrip("/"))
         else:
             del f[key]  # Wipe before write
-    dset = f.create_dataset(key, shape=value.shape, dtype=value.dtype, **dataset_kwargs)
+    if isinstance(f, h5py.Group) or is_zarr_v2():
+        dset = f.create_dataset(
+            key, shape=value.shape, dtype=value.dtype, **dataset_kwargs
+        )
+    else:
+        dset = f.create_array(
+            key, shape=value.shape, dtype=value.dtype, **dataset_kwargs
+        )
     compressed_axis = int(isinstance(value, sparse.csc_matrix))
     for idx in idx_chunks_along_axis(value.shape, compressed_axis, 1000):
         dset[idx] = value[idx].toarray()