From be4be30f9ad7779179b1bc64b92dce8c8a0e982d Mon Sep 17 00:00:00 2001
From: ilan-gold <ilanbassgold@gmail.com>
Date: Fri, 8 Nov 2024 09:41:29 +0100
Subject: [PATCH 01/22] (fix): lazy chunking respects -1

---
 src/anndata/_io/specs/lazy_methods.py | 2 +-
 src/anndata/_io/specs/registry.py     | 6 ++++++
 tests/test_io_elementwise.py          | 3 +++
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/anndata/_io/specs/lazy_methods.py b/src/anndata/_io/specs/lazy_methods.py
index 48770be9c..a952de37b 100644
--- a/src/anndata/_io/specs/lazy_methods.py
+++ b/src/anndata/_io/specs/lazy_methods.py
@@ -105,7 +105,7 @@ def read_sparse_as_dask(
     if chunks is not None:
         if len(chunks) != 2:
             raise ValueError("`chunks` must be a tuple of two integers")
-        if chunks[minor_dim] != shape[minor_dim]:
+        if chunks[minor_dim] not in {shape[minor_dim], -1}:
             raise ValueError(
                 "Only the major axis can be chunked. "
                 f"Try setting chunks to {((-1, _DEFAULT_STRIDE) if is_csc else (_DEFAULT_STRIDE, -1))}"
diff --git a/src/anndata/_io/specs/registry.py b/src/anndata/_io/specs/registry.py
index 3b43def7c..37484ee3d 100644
--- a/src/anndata/_io/specs/registry.py
+++ b/src/anndata/_io/specs/registry.py
@@ -451,6 +451,12 @@ def read_elem_as_dask(
     ...     g["X"], chunks=(500, adata.shape[1])
     ... )
     >>> adata.layers["dense"] = ad.experimental.read_elem_as_dask(g["layers/dense"])
+
+    We also support using -1 as a chunk size to signfiy the reading the whole axis:
+
+    >>> >>> adata.X = ad.experimental.read_elem_as_dask(
+    ...     g["X"], chunks=(500, -1)
+    ... )
     """
     return DaskReader(_LAZY_REGISTRY).read_elem(elem, chunks=chunks)
 
diff --git a/tests/test_io_elementwise.py b/tests/test_io_elementwise.py
index e46cd7d81..b1898926b 100644
--- a/tests/test_io_elementwise.py
+++ b/tests/test_io_elementwise.py
@@ -284,6 +284,7 @@ def test_read_lazy_2d_dask(sparse_format, store):
         (2, (200, 400)),
         (1, None),
         (2, None),
+        (2, (400, -1)),
     ],
 )
 def test_read_lazy_subsets_nd_dask(store, n_dims, chunks):
@@ -323,6 +324,8 @@ def test_read_lazy_h5_cluster(sparse_format, tmp_path):
         ("csr", (10, SIZE * 2)),
         ("csc", None),
         ("csr", None),
+        ("csr", (10, -1)),
+        ("csc", (-1, 10)),
     ],
 )
 def test_read_lazy_2d_chunk_kwargs(store, arr_type, chunks):

From 2115298cc7aa390ddbf79697f2503c8bab0170ff Mon Sep 17 00:00:00 2001
From: ilan-gold <ilanbassgold@gmail.com>
Date: Fri, 8 Nov 2024 09:42:09 +0100
Subject: [PATCH 02/22] (fix): cache arrays in `BaseCompressedSparseDataset`

---
 src/anndata/_core/sparse_dataset.py   | 29 ++++++++++----
 src/anndata/_io/specs/lazy_methods.py | 38 ++++++++++---------
 src/anndata/tests/helpers.py          |  2 +-
 tests/test_backed_sparse.py           | 54 +++++++++++++++++++--------
 4 files changed, 82 insertions(+), 41 deletions(-)

diff --git a/src/anndata/_core/sparse_dataset.py b/src/anndata/_core/sparse_dataset.py
index ae6b47c7f..a051f5ea1 100644
--- a/src/anndata/_core/sparse_dataset.py
+++ b/src/anndata/_core/sparse_dataset.py
@@ -380,7 +380,7 @@ def backend(self) -> Literal["zarr", "hdf5"]:
     @property
     def dtype(self) -> np.dtype:
         """The :class:`numpy.dtype` of the `data` attribute of the sparse matrix."""
-        return self.group["data"].dtype
+        return self._data.dtype
 
     @classmethod
     def _check_group_format(cls, group):
@@ -546,8 +546,9 @@ def append(self, sparse_matrix: ss.csr_matrix | ss.csc_matrix | SpArray) -> None
             sparse_matrix.indptr[1:].astype(np.int64) + indptr_offset
         )
         # Clear cached property
-        if hasattr(self, "indptr"):
-            del self._indptr
+        for attr in ["_indptr", "_indices", "_data"]:
+            if hasattr(self, attr):
+                delattr(self, attr)
 
         # indices
         indices = self.group["indices"]
@@ -565,11 +566,25 @@ def _indptr(self) -> np.ndarray:
         arr = self.group["indptr"][...]
         return arr
 
+    @cached_property
+    def _indices(self) -> np.ndarray:
+        """\
+        Cache access to the indices to prevent unnecessary reads of the zarray
+        """
+        return self.group["indices"]
+
+    @cached_property
+    def _data(self) -> np.ndarray:
+        """\
+        Cache access to the data to prevent unnecessary reads of the zarray
+        """
+        return self.group["data"]
+
     def _to_backed(self) -> BackedSparseMatrix:
         format_class = get_backed_class(self.format)
         mtx = format_class(self.shape, dtype=self.dtype)
-        mtx.data = self.group["data"]
-        mtx.indices = self.group["indices"]
+        mtx.data = self._data
+        mtx.indices = self._indices
         mtx.indptr = self._indptr
         return mtx
 
@@ -578,8 +593,8 @@ def to_memory(self) -> ss.csr_matrix | ss.csc_matrix | SpArray:
             self.format, use_sparray_in_io=settings.use_sparse_array_on_read
         )
         mtx = format_class(self.shape, dtype=self.dtype)
-        mtx.data = self.group["data"][...]
-        mtx.indices = self.group["indices"][...]
+        mtx.data = self._data[...]
+        mtx.indices = self._indices[...]
         mtx.indptr = self._indptr
         return mtx
 
diff --git a/src/anndata/_io/specs/lazy_methods.py b/src/anndata/_io/specs/lazy_methods.py
index a952de37b..581539b15 100644
--- a/src/anndata/_io/specs/lazy_methods.py
+++ b/src/anndata/_io/specs/lazy_methods.py
@@ -10,17 +10,17 @@
 from scipy import sparse
 
 import anndata as ad
+from anndata.abc import CSCDataset, CSRDataset
 
 from ..._core.file_backing import filename, get_elem_name
 from ...compat import H5Array, H5Group, ZarrArray, ZarrGroup
 from .registry import _LAZY_REGISTRY, IOSpec
 
 if TYPE_CHECKING:
-    from collections.abc import Callable, Generator, Mapping, Sequence
+    from collections.abc import Generator, Mapping, Sequence
     from typing import Literal, ParamSpec, TypeVar
 
-    from ..._core.sparse_dataset import _CSCDataset, _CSRDataset
-    from ..._types import ArrayStorageType, StorageType
+    from ..._types import StorageType
     from ...compat import DaskArray
     from .registry import DaskReader
 
@@ -35,12 +35,12 @@
 
 @contextmanager
 def maybe_open_h5(
-    path_or_group: Path | ZarrGroup, elem_name: str
+    path_or_dataset: Path | CSRDataset | CSCDataset, elem_name: str
 ) -> Generator[StorageType, None, None]:
-    if not isinstance(path_or_group, Path):
-        yield path_or_group
+    if not isinstance(path_or_dataset, Path):
+        yield path_or_dataset
         return
-    file = h5py.File(path_or_group, "r")
+    file = h5py.File(path_or_dataset, "r")
     try:
         yield file[elem_name]
     finally:
@@ -61,20 +61,17 @@ def compute_chunk_layout_for_axis_shape(
 
 
 def make_dask_chunk(
-    path_or_group: Path | ZarrGroup,
+    path_or_sparse_dataset: Path | CSRDataset | CSCDataset,
     elem_name: str,
     block_info: BlockInfo | None = None,
-    *,
-    wrap: Callable[[ArrayStorageType], ArrayStorageType]
-    | Callable[[H5Group | ZarrGroup], _CSRDataset | _CSCDataset] = lambda g: g,
 ):
     if block_info is None:
         msg = "Block info is required"
         raise ValueError(msg)
     # We need to open the file in each task since `dask` cannot share h5py objects when using `dask.distributed`
     # https://github.com/scverse/anndata/issues/1105
-    with maybe_open_h5(path_or_group, elem_name) as f:
-        mtx = wrap(f)
+    with maybe_open_h5(path_or_sparse_dataset, elem_name) as f:
+        mtx = ad.io.sparse_dataset(f) if isinstance(f, H5Group) else f
         idx = tuple(
             slice(start, stop) for start, stop in block_info[None]["array-location"]
         )
@@ -94,10 +91,17 @@ def read_sparse_as_dask(
 ) -> DaskArray:
     import dask.array as da
 
-    path_or_group = Path(filename(elem)) if isinstance(elem, H5Group) else elem
+    path_or_sparse_dataset = (
+        Path(filename(elem))
+        if isinstance(elem, H5Group)
+        else ad.io.sparse_dataset(elem)
+    )
     elem_name = get_elem_name(elem)
     shape: tuple[int, int] = tuple(elem.attrs["shape"])
-    dtype = elem["data"].dtype
+    if isinstance(path_or_sparse_dataset, CSRDataset | CSCDataset):
+        dtype = path_or_sparse_dataset.dtype
+    else:
+        dtype = elem["data"].dtype
     is_csc: bool = elem.attrs["encoding-type"] == "csc_matrix"
 
     stride: int = _DEFAULT_STRIDE
@@ -119,9 +123,7 @@ def read_sparse_as_dask(
         (chunks_minor, chunks_major) if is_csc else (chunks_major, chunks_minor)
     )
     memory_format = sparse.csc_matrix if is_csc else sparse.csr_matrix
-    make_chunk = partial(
-        make_dask_chunk, path_or_group, elem_name, wrap=ad.io.sparse_dataset
-    )
+    make_chunk = partial(make_dask_chunk, path_or_sparse_dataset, elem_name)
     da_mtx = da.map_blocks(
         make_chunk,
         dtype=dtype,
diff --git a/src/anndata/tests/helpers.py b/src/anndata/tests/helpers.py
index 6ed637ed8..8b0226634 100644
--- a/src/anndata/tests/helpers.py
+++ b/src/anndata/tests/helpers.py
@@ -1057,7 +1057,7 @@ class AccessTrackingStore(DirectoryStore):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self._access_count = Counter()
-        self._accessed_keys = {}
+        self._accessed_keys = dict()
 
     def __getitem__(self, key: str) -> object:
         for tracked in self._access_count:
diff --git a/tests/test_backed_sparse.py b/tests/test_backed_sparse.py
index 2778c76bb..fb97c69c2 100644
--- a/tests/test_backed_sparse.py
+++ b/tests/test_backed_sparse.py
@@ -13,7 +13,8 @@
 import anndata as ad
 from anndata._core.anndata import AnnData
 from anndata._core.sparse_dataset import sparse_dataset
-from anndata.compat import CAN_USE_SPARSE_ARRAY, SpArray
+from anndata._io.specs.registry import read_elem_as_dask
+from anndata.compat import CAN_USE_SPARSE_ARRAY, DaskArray, SpArray
 from anndata.experimental import read_dispatched
 from anndata.tests.helpers import AccessTrackingStore, assert_equal, subset_func
 
@@ -26,6 +27,9 @@
     from numpy.typing import ArrayLike, NDArray
     from pytest_mock import MockerFixture
 
+    from anndata.abc import CSCDataset, CSRDataset
+    from anndata.compat import ZarrGroup
+
     Idx = slice | int | NDArray[np.integer] | NDArray[np.bool_]
 
 
@@ -354,24 +358,33 @@ def test_dataset_append_disk(
 
 
 @pytest.mark.parametrize("sparse_format", [sparse.csr_matrix, sparse.csc_matrix])
-def test_indptr_cache(
+def test_lazy_array_cache(
     tmp_path: Path,
     sparse_format: Callable[[ArrayLike], sparse.spmatrix],
 ):
+    elems = {"indptr", "indices", "data"}
     path = tmp_path / "test.zarr"
     a = sparse_format(sparse.random(10, 10))
     f = zarr.open_group(path, "a")
     ad.io.write_elem(f, "X", a)
     store = AccessTrackingStore(path)
-    store.initialize_key_trackers(["X/indptr"])
+    for elem in elems:
+        store.initialize_key_trackers([f"X/{elem}"])
     f = zarr.open_group(store, "a")
     a_disk = sparse_dataset(f["X"])
     a_disk[:1]
     a_disk[3:5]
     a_disk[6:7]
     a_disk[8:9]
-    # one each for .zarray and actual access
     assert store.get_access_count("X/indptr") == 2
+    for elem_not_indptr in elems - {"indptr"}:
+        assert (
+            sum(
+                ".zarray" in key_accessed
+                for key_accessed in store.get_accessed_keys(f"X/{elem_not_indptr}")
+            )
+            == 1
+        )
 
 
 Kind = Literal["slice", "int", "array", "mask"]
@@ -421,27 +434,38 @@ def width_idx_kinds(
         (
             [0],
             slice(None, None),
-            ["X/data/.zarray", "X/data/.zarray", "X/data/0"],
+            ["X/data/.zarray", "X/data/0"],
         ),
         (
             [0],
             slice(None, 3),
-            ["X/data/.zarray", "X/data/.zarray", "X/data/0"],
+            ["X/data/.zarray", "X/data/0"],
         ),
         (
             [3, 4, 5],
             slice(None, None),
-            ["X/data/.zarray", "X/data/.zarray", "X/data/3", "X/data/4", "X/data/5"],
+            ["X/data/.zarray", "X/data/3", "X/data/4", "X/data/5"],
         ),
         l=10,
     ),
 )
+@pytest.mark.parametrize(
+    "open_func",
+    [
+        sparse_dataset,
+        lambda x: read_elem_as_dask(
+            x, chunks=(1, -1) if x.attrs["encoding-type"] == "csr_matrix" else (-1, 1)
+        ),
+    ],
+    ids=["sparse_dataset", "read_elem_as_dask"],
+)
 def test_data_access(
     tmp_path: Path,
     sparse_format: Callable[[ArrayLike], sparse.spmatrix],
     idx_maj: Idx,
     idx_min: Idx,
     exp: Sequence[str],
+    open_func: Callable[[ZarrGroup], CSRDataset | CSCDataset | DaskArray],
 ):
     path = tmp_path / "test.zarr"
     a = sparse_format(np.eye(10, 10))
@@ -454,19 +478,19 @@ def test_data_access(
     store = AccessTrackingStore(path)
     store.initialize_key_trackers(["X/data"])
     f = zarr.open_group(store)
-    a_disk = sparse_dataset(f["X"])
-
-    # Do the slicing with idx
-    store.reset_key_trackers()
-    if a_disk.format == "csr":
-        a_disk[idx_maj, idx_min]
+    a_disk = AnnData(X=open_func(f["X"]))
+    if a.format == "csr":
+        subset = a_disk[idx_maj, idx_min]
     else:
-        a_disk[idx_min, idx_maj]
+        subset = a_disk[idx_min, idx_maj]
+    if isinstance(subset.X, DaskArray):
+        subset.X.compute(scheduler="single-threaded")
 
     assert store.get_access_count("X/data") == len(exp), store.get_accessed_keys(
         "X/data"
     )
-    assert store.get_accessed_keys("X/data") == exp
+    # dask access order is not guaranteed so need to sort
+    assert sorted(store.get_accessed_keys("X/data")) == sorted(exp)
 
 
 @pytest.mark.parametrize(

From 2edabe2d947643da491df75205f7bec167c5080e Mon Sep 17 00:00:00 2001
From: ilan-gold <ilanbassgold@gmail.com>
Date: Fri, 8 Nov 2024 10:13:53 +0100
Subject: [PATCH 03/22] (fix): clean up typing

---
 src/anndata/_io/specs/lazy_methods.py | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/src/anndata/_io/specs/lazy_methods.py b/src/anndata/_io/specs/lazy_methods.py
index 581539b15..2c23f2a85 100644
--- a/src/anndata/_io/specs/lazy_methods.py
+++ b/src/anndata/_io/specs/lazy_methods.py
@@ -3,7 +3,7 @@
 from contextlib import contextmanager
 from functools import partial
 from pathlib import Path
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, overload
 
 import h5py
 import numpy as np
@@ -18,10 +18,9 @@
 
 if TYPE_CHECKING:
     from collections.abc import Generator, Mapping, Sequence
-    from typing import Literal, ParamSpec, TypeVar
+    from typing import Any, Literal, ParamSpec, TypeVar
 
-    from ..._types import StorageType
-    from ...compat import DaskArray
+    from ...compat import DaskArray, H5File
     from .registry import DaskReader
 
     BlockInfo = Mapping[
@@ -31,12 +30,21 @@
 
     P = ParamSpec("P")
     R = TypeVar("R")
+    D = TypeVar("D", bound=CSCDataset | CSRDataset)
 
 
+@overload
+@contextmanager
+def maybe_open_h5(path_or_dataset: D, elem_name: str) -> Generator[D, None, None]: ...
+@overload
+@contextmanager
+def maybe_open_h5(
+    path_or_dataset: Path, elem_name: str
+) -> Generator[H5File, None, None]: ...
 @contextmanager
 def maybe_open_h5(
-    path_or_dataset: Path | CSRDataset | CSCDataset, elem_name: str
-) -> Generator[StorageType, None, None]:
+    path_or_dataset: Any, elem_name: str
+) -> Generator[H5File | D, None, None]:
     if not isinstance(path_or_dataset, Path):
         yield path_or_dataset
         return

From 286011649fb8e962fd152b751c138f0de1aed7f9 Mon Sep 17 00:00:00 2001
From: ilan-gold <ilanbassgold@gmail.com>
Date: Fri, 8 Nov 2024 10:14:20 +0100
Subject: [PATCH 04/22] (fix): doctest double >>>

---
 src/anndata/_io/specs/registry.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/anndata/_io/specs/registry.py b/src/anndata/_io/specs/registry.py
index 37484ee3d..c863038fc 100644
--- a/src/anndata/_io/specs/registry.py
+++ b/src/anndata/_io/specs/registry.py
@@ -454,9 +454,7 @@ def read_elem_as_dask(
 
     We also support using -1 as a chunk size to signfiy the reading the whole axis:
 
-    >>> >>> adata.X = ad.experimental.read_elem_as_dask(
-    ...     g["X"], chunks=(500, -1)
-    ... )
+    >>> adata.X = ad.experimental.read_elem_as_dask(g["X"], chunks=(500, -1))
     """
     return DaskReader(_LAZY_REGISTRY).read_elem(elem, chunks=chunks)
 

From fa96348d35c4de7188acb13c0d136f262ec1f4d9 Mon Sep 17 00:00:00 2001
From: ilan-gold <ilanbassgold@gmail.com>
Date: Fri, 8 Nov 2024 10:14:34 +0100
Subject: [PATCH 05/22] (chore): add tests

---
 tests/test_io_elementwise.py | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/tests/test_io_elementwise.py b/tests/test_io_elementwise.py
index b1898926b..65de0469d 100644
--- a/tests/test_io_elementwise.py
+++ b/tests/test_io_elementwise.py
@@ -335,12 +335,21 @@ def test_read_lazy_2d_chunk_kwargs(store, arr_type, chunks):
     else:
         arr_store = create_sparse_store(arr_type, store)
         X_dask_from_disk = read_elem_as_dask(arr_store["X"], chunks=chunks)
-    if chunks is not None:
-        assert X_dask_from_disk.chunksize == chunks
+    if arr_type != "dense":
+        if chunks is not None:
+            expected_chunks = chunks
+            if -1 in expected_chunks:
+                missing_axis = expected_chunks.index(-1)
+                missing_aixs_size_size = SIZE * (1 + missing_axis)
+                expected_chunks = list(expected_chunks)
+                expected_chunks[missing_axis] = missing_aixs_size_size
+                expected_chunks = tuple(expected_chunks)
+            assert X_dask_from_disk.chunksize == expected_chunks
+            # assert that sparse chunks are set correctly by default
+            minor_index = int(arr_type == "csr")
+            assert X_dask_from_disk.chunksize[minor_index] == SIZE * (1 + minor_index)
     else:
-        minor_index = int(arr_type == "csr")
-        # assert that sparse chunks are set correctly by default
-        assert X_dask_from_disk.chunksize[minor_index] == SIZE * (1 + minor_index)
+        assert X_dask_from_disk.chunksize == chunks
     X_from_disk = read_elem(arr_store["X"])
     assert_equal(X_from_disk, X_dask_from_disk)
 

From a0e2d52477a854198df110c2547b3070b967c8c8 Mon Sep 17 00:00:00 2001
From: ilan-gold <ilanbassgold@gmail.com>
Date: Fri, 8 Nov 2024 10:17:07 +0100
Subject: [PATCH 06/22] (fix): more typing updates

---
 src/anndata/_io/specs/lazy_methods.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/anndata/_io/specs/lazy_methods.py b/src/anndata/_io/specs/lazy_methods.py
index 2c23f2a85..e058221f6 100644
--- a/src/anndata/_io/specs/lazy_methods.py
+++ b/src/anndata/_io/specs/lazy_methods.py
@@ -20,7 +20,7 @@
     from collections.abc import Generator, Mapping, Sequence
     from typing import Any, Literal, ParamSpec, TypeVar
 
-    from ...compat import DaskArray, H5File
+    from ...compat import DaskArray, H5File, SpArray
     from .registry import DaskReader
 
     BlockInfo = Mapping[
@@ -69,10 +69,10 @@ def compute_chunk_layout_for_axis_shape(
 
 
 def make_dask_chunk(
-    path_or_sparse_dataset: Path | CSRDataset | CSCDataset,
+    path_or_sparse_dataset: Path | D,
     elem_name: str,
     block_info: BlockInfo | None = None,
-):
+) -> sparse.csr_matrix | sparse.csc_matrix | SpArray:
     if block_info is None:
         msg = "Block info is required"
         raise ValueError(msg)

From dc01a3a12ad2798eff04484df19b4c1bd8321f13 Mon Sep 17 00:00:00 2001
From: ilan-gold <ilanbassgold@gmail.com>
Date: Fri, 8 Nov 2024 10:18:58 +0100
Subject: [PATCH 07/22] (chore): add tests

---
 tests/test_io_elementwise.py | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/tests/test_io_elementwise.py b/tests/test_io_elementwise.py
index b1898926b..65de0469d 100644
--- a/tests/test_io_elementwise.py
+++ b/tests/test_io_elementwise.py
@@ -335,12 +335,21 @@ def test_read_lazy_2d_chunk_kwargs(store, arr_type, chunks):
     else:
         arr_store = create_sparse_store(arr_type, store)
         X_dask_from_disk = read_elem_as_dask(arr_store["X"], chunks=chunks)
-    if chunks is not None:
-        assert X_dask_from_disk.chunksize == chunks
+    if arr_type != "dense":
+        if chunks is not None:
+            expected_chunks = chunks
+            if -1 in expected_chunks:
+                missing_axis = expected_chunks.index(-1)
+                missing_aixs_size_size = SIZE * (1 + missing_axis)
+                expected_chunks = list(expected_chunks)
+                expected_chunks[missing_axis] = missing_aixs_size_size
+                expected_chunks = tuple(expected_chunks)
+            assert X_dask_from_disk.chunksize == expected_chunks
+            # assert that sparse chunks are set correctly by default
+            minor_index = int(arr_type == "csr")
+            assert X_dask_from_disk.chunksize[minor_index] == SIZE * (1 + minor_index)
     else:
-        minor_index = int(arr_type == "csr")
-        # assert that sparse chunks are set correctly by default
-        assert X_dask_from_disk.chunksize[minor_index] == SIZE * (1 + minor_index)
+        assert X_dask_from_disk.chunksize == chunks
     X_from_disk = read_elem(arr_store["X"])
     assert_equal(X_from_disk, X_dask_from_disk)
 

From 37aba1b8b14abe7f61270ae553df2b16880ca655 Mon Sep 17 00:00:00 2001
From: ilan-gold <ilanbassgold@gmail.com>
Date: Fri, 8 Nov 2024 10:21:34 +0100
Subject: [PATCH 08/22] (fix): remove extra >>>

---
 src/anndata/_io/specs/registry.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/anndata/_io/specs/registry.py b/src/anndata/_io/specs/registry.py
index 37484ee3d..c863038fc 100644
--- a/src/anndata/_io/specs/registry.py
+++ b/src/anndata/_io/specs/registry.py
@@ -454,9 +454,7 @@ def read_elem_as_dask(
 
     We also support using -1 as a chunk size to signfiy the reading the whole axis:
 
-    >>> >>> adata.X = ad.experimental.read_elem_as_dask(
-    ...     g["X"], chunks=(500, -1)
-    ... )
+    >>> adata.X = ad.experimental.read_elem_as_dask(g["X"], chunks=(500, -1))
     """
     return DaskReader(_LAZY_REGISTRY).read_elem(elem, chunks=chunks)
 

From 32fbef9f7b80674119047ea42710a839e3a9cdae Mon Sep 17 00:00:00 2001
From: ilan-gold <ilanbassgold@gmail.com>
Date: Fri, 8 Nov 2024 10:22:24 +0100
Subject: [PATCH 09/22] (fix): spelling

---
 src/anndata/_io/specs/registry.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/anndata/_io/specs/registry.py b/src/anndata/_io/specs/registry.py
index c863038fc..338e4e596 100644
--- a/src/anndata/_io/specs/registry.py
+++ b/src/anndata/_io/specs/registry.py
@@ -452,7 +452,7 @@ def read_elem_as_dask(
     ... )
     >>> adata.layers["dense"] = ad.experimental.read_elem_as_dask(g["layers/dense"])
 
-    We also support using -1 as a chunk size to signfiy the reading the whole axis:
+    We also support using -1 as a chunk size to signify the reading the whole axis:
 
     >>> adata.X = ad.experimental.read_elem_as_dask(g["X"], chunks=(500, -1))
     """

From ceb70b44ef6b5fcdeff996be81ba8f651a008f4f Mon Sep 17 00:00:00 2001
From: ilan-gold <ilanbassgold@gmail.com>
Date: Fri, 8 Nov 2024 10:30:02 +0100
Subject: [PATCH 10/22] (chore): release note

---
 docs/release-notes/1743.bugfix.md | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 docs/release-notes/1743.bugfix.md

diff --git a/docs/release-notes/1743.bugfix.md b/docs/release-notes/1743.bugfix.md
new file mode 100644
index 000000000..f8f489aff
--- /dev/null
+++ b/docs/release-notes/1743.bugfix.md
@@ -0,0 +1 @@
+Fix chunking with -1 in `chunks` argument of {func}`~anndata.experimental.read_elem_as_dask` {user}`ilan-gold`

From fedd82721152e352e541dc6bb7a9bc0772c8970c Mon Sep 17 00:00:00 2001
From: ilan-gold <ilanbassgold@gmail.com>
Date: Fri, 8 Nov 2024 10:32:47 +0100
Subject: [PATCH 11/22] (chore): release note

---
 docs/release-notes/1744.bugfix.md | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 docs/release-notes/1744.bugfix.md

diff --git a/docs/release-notes/1744.bugfix.md b/docs/release-notes/1744.bugfix.md
new file mode 100644
index 000000000..18023bb16
--- /dev/null
+++ b/docs/release-notes/1744.bugfix.md
@@ -0,0 +1,2 @@
+
+Cache accesses to the `data` and `indices` arrays in {class}`~anndata.abc.CSRDataset` and {class}`~anndata.abc.CSCDataset` {user}`ilan-gold`

From 5960331a31584807c725d8a1db277673fdfc2b57 Mon Sep 17 00:00:00 2001
From: ilan-gold <ilanbassgold@gmail.com>
Date: Fri, 8 Nov 2024 11:25:43 +0100
Subject: [PATCH 12/22] (fix): support `None` and `-1`

---
 src/anndata/_io/specs/lazy_methods.py | 12 ++++++--
 src/anndata/_io/specs/registry.py     |  4 ++-
 tests/test_io_elementwise.py          | 41 +++++++++++----------------
 3 files changed, 29 insertions(+), 28 deletions(-)

diff --git a/src/anndata/_io/specs/lazy_methods.py b/src/anndata/_io/specs/lazy_methods.py
index a952de37b..089730fa3 100644
--- a/src/anndata/_io/specs/lazy_methods.py
+++ b/src/anndata/_io/specs/lazy_methods.py
@@ -105,12 +105,16 @@ def read_sparse_as_dask(
     if chunks is not None:
         if len(chunks) != 2:
             raise ValueError("`chunks` must be a tuple of two integers")
-        if chunks[minor_dim] not in {shape[minor_dim], -1}:
+        if chunks[minor_dim] not in {shape[minor_dim], -1, None}:
             raise ValueError(
                 "Only the major axis can be chunked. "
                 f"Try setting chunks to {((-1, _DEFAULT_STRIDE) if is_csc else (_DEFAULT_STRIDE, -1))}"
             )
-        stride = chunks[major_dim]
+        stride = (
+            chunks[major_dim]
+            if chunks[major_dim] not in {None, -1}
+            else shape[major_dim]
+        )
 
     shape_minor, shape_major = shape if is_csc else shape[::-1]
     chunks_major = compute_chunk_layout_for_axis_shape(stride, shape_major)
@@ -142,7 +146,9 @@ def read_h5_array(
     shape = tuple(elem.shape)
     dtype = elem.dtype
     chunks: tuple[int, ...] = (
-        chunks if chunks is not None else (_DEFAULT_STRIDE,) * len(shape)
+        tuple(c if c not in {None, -1} else shape[i] for i, c in enumerate(chunks))
+        if chunks is not None
+        else (_DEFAULT_STRIDE,) * len(shape)
     )
 
     chunk_layout = tuple(
diff --git a/src/anndata/_io/specs/registry.py b/src/anndata/_io/specs/registry.py
index 338e4e596..ca13f8e59 100644
--- a/src/anndata/_io/specs/registry.py
+++ b/src/anndata/_io/specs/registry.py
@@ -398,6 +398,7 @@ def read_elem_as_dask(
        Defaults to `(1000, adata.shape[1])` for CSR sparse,
        `(adata.shape[0], 1000)` for CSC sparse,
        and the on-disk chunking otherwise for dense.
+       Can use `-1` or `None` to indicate use of the size of the corresponding dimension.
 
     Returns
     -------
@@ -452,9 +453,10 @@ def read_elem_as_dask(
     ... )
     >>> adata.layers["dense"] = ad.experimental.read_elem_as_dask(g["layers/dense"])
 
-    We also support using -1 as a chunk size to signify the reading the whole axis:
+    We also support using -1 and None as a chunk size to signify the reading the whole axis:
 
     >>> adata.X = ad.experimental.read_elem_as_dask(g["X"], chunks=(500, -1))
+    >>> adata.X = ad.experimental.read_elem_as_dask(g["X"], chunks=(500, None))
     """
     return DaskReader(_LAZY_REGISTRY).read_elem(elem, chunks=chunks)
 
diff --git a/tests/test_io_elementwise.py b/tests/test_io_elementwise.py
index 65de0469d..d6dc0033b 100644
--- a/tests/test_io_elementwise.py
+++ b/tests/test_io_elementwise.py
@@ -285,6 +285,7 @@ def test_read_lazy_2d_dask(sparse_format, store):
         (1, None),
         (2, None),
         (2, (400, -1)),
+        (2, (400, None)),
     ],
 )
 def test_read_lazy_subsets_nd_dask(store, n_dims, chunks):
@@ -317,39 +318,31 @@ def test_read_lazy_h5_cluster(sparse_format, tmp_path):
 
 
 @pytest.mark.parametrize(
-    ("arr_type", "chunks"),
+    ("arr_type", "chunks", "expected_chunksize"),
     [
-        ("dense", (100, 100)),
-        ("csc", (SIZE, 10)),
-        ("csr", (10, SIZE * 2)),
-        ("csc", None),
-        ("csr", None),
-        ("csr", (10, -1)),
-        ("csc", (-1, 10)),
+        ("dense", (100, 100), (100, 100)),
+        ("csc", (SIZE, 10), (SIZE, 10)),
+        ("csr", (10, SIZE * 2), (10, SIZE * 2)),
+        ("csc", None, (SIZE, 1000)),
+        ("csr", None, (1000, SIZE * 2)),
+        ("csr", (10, -1), (10, SIZE * 2)),
+        ("csc", (-1, 10), (SIZE, 10)),
+        ("csr", (10, None), (10, SIZE * 2)),
+        ("csc", (None, 10), (SIZE, 10)),
+        ("csc", (None, None), (SIZE, SIZE * 2)),
+        ("csr", (None, None), (SIZE, SIZE * 2)),
+        ("csr", (-1, -1), (SIZE, SIZE * 2)),
+        ("csc", (-1, -1), (SIZE, SIZE * 2)),
     ],
 )
-def test_read_lazy_2d_chunk_kwargs(store, arr_type, chunks):
+def test_read_lazy_2d_chunk_kwargs(store, arr_type, chunks, expected_chunksize):
     if arr_type == "dense":
         arr_store = create_dense_store(store)
         X_dask_from_disk = read_elem_as_dask(arr_store["X"], chunks=chunks)
     else:
         arr_store = create_sparse_store(arr_type, store)
         X_dask_from_disk = read_elem_as_dask(arr_store["X"], chunks=chunks)
-    if arr_type != "dense":
-        if chunks is not None:
-            expected_chunks = chunks
-            if -1 in expected_chunks:
-                missing_axis = expected_chunks.index(-1)
-                missing_aixs_size_size = SIZE * (1 + missing_axis)
-                expected_chunks = list(expected_chunks)
-                expected_chunks[missing_axis] = missing_aixs_size_size
-                expected_chunks = tuple(expected_chunks)
-            assert X_dask_from_disk.chunksize == expected_chunks
-            # assert that sparse chunks are set correctly by default
-            minor_index = int(arr_type == "csr")
-            assert X_dask_from_disk.chunksize[minor_index] == SIZE * (1 + minor_index)
-    else:
-        assert X_dask_from_disk.chunksize == chunks
+    assert X_dask_from_disk.chunksize == expected_chunksize
     X_from_disk = read_elem(arr_store["X"])
     assert_equal(X_from_disk, X_dask_from_disk)
 

From e652c449c9c219814cb5554b32969c8d22023633 Mon Sep 17 00:00:00 2001
From: ilan-gold <ilanbassgold@gmail.com>
Date: Fri, 8 Nov 2024 12:24:02 +0100
Subject: [PATCH 13/22] (chore): typing

---
 tests/test_io_elementwise.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tests/test_io_elementwise.py b/tests/test_io_elementwise.py
index d6dc0033b..3ca5324b8 100644
--- a/tests/test_io_elementwise.py
+++ b/tests/test_io_elementwise.py
@@ -335,7 +335,12 @@ def test_read_lazy_h5_cluster(sparse_format, tmp_path):
         ("csc", (-1, -1), (SIZE, SIZE * 2)),
     ],
 )
-def test_read_lazy_2d_chunk_kwargs(store, arr_type, chunks, expected_chunksize):
+def test_read_lazy_2d_chunk_kwargs(
+    store: H5Group | ZarrGroup,
+    arr_type: Literal["csr", "csc", "dense"],
+    chunks: None | tuple[int | None, int | None],
+    expected_chunksize: tuple[int, int],
+):
     if arr_type == "dense":
         arr_store = create_dense_store(store)
         X_dask_from_disk = read_elem_as_dask(arr_store["X"], chunks=chunks)

From 59849a85b77442d0979fbdb15c375eae4edddf5f Mon Sep 17 00:00:00 2001
From: ilan-gold <ilanbassgold@gmail.com>
Date: Fri, 8 Nov 2024 12:42:30 +0100
Subject: [PATCH 14/22] (chore): add cache bust test

---
 src/anndata/_core/sparse_dataset.py |  9 +++++----
 tests/test_backed_sparse.py         | 19 +++++++++++++++++++
 2 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/src/anndata/_core/sparse_dataset.py b/src/anndata/_core/sparse_dataset.py
index a051f5ea1..15335f21f 100644
--- a/src/anndata/_core/sparse_dataset.py
+++ b/src/anndata/_core/sparse_dataset.py
@@ -545,10 +545,6 @@ def append(self, sparse_matrix: ss.csr_matrix | ss.csc_matrix | SpArray) -> None
         indptr[orig_data_size:] = (
             sparse_matrix.indptr[1:].astype(np.int64) + indptr_offset
         )
-        # Clear cached property
-        for attr in ["_indptr", "_indices", "_data"]:
-            if hasattr(self, attr):
-                delattr(self, attr)
 
         # indices
         indices = self.group["indices"]
@@ -556,6 +552,11 @@ def append(self, sparse_matrix: ss.csr_matrix | ss.csc_matrix | SpArray) -> None
         indices.resize((orig_data_size + sparse_matrix.indices.shape[0],))
         indices[orig_data_size:] = sparse_matrix.indices
 
+        # Clear cached property
+        for attr in ["_indptr", "_indices", "_data"]:
+            if hasattr(self, attr):
+                delattr(self, attr)
+
     @cached_property
     def _indptr(self) -> np.ndarray:
         """\
diff --git a/tests/test_backed_sparse.py b/tests/test_backed_sparse.py
index fb97c69c2..8d7a32a38 100644
--- a/tests/test_backed_sparse.py
+++ b/tests/test_backed_sparse.py
@@ -285,6 +285,25 @@ def test_dataset_append_memory(
     assert_equal(fromdisk, frommem)
 
 
+def test_append_array_cache_bust(tmp_path: Path, diskfmt):
+    path = tmp_path / f"test.{diskfmt.replace('ad', '')}"
+    a = sparse.random(100, 100, format="csr")
+    if diskfmt == "zarr":
+        f = zarr.open_group(path, "a")
+    else:
+        f = h5py.File(path, "a")
+    ad.io.write_elem(f, "mtx", a)
+    ad.io.write_elem(f, "mtx_2", a)
+    diskmtx = sparse_dataset(f["mtx"])
+    old_array_shapes = {}
+    array_names = ["indptr", "indices", "data"]
+    for name in array_names:
+        old_array_shapes[name] = getattr(diskmtx, f"_{name}").shape
+    diskmtx.append(sparse_dataset(f["mtx_2"]))
+    for name in array_names:
+        assert old_array_shapes[name] != getattr(diskmtx, f"_{name}").shape
+
+
 @pytest.mark.parametrize("sparse_format", [sparse.csr_matrix, sparse.csc_matrix])
 @pytest.mark.parametrize(
     ("subset_func", "subset_func2"),

From 41bd62e33e466e0f67a6e41e574e708d597274f2 Mon Sep 17 00:00:00 2001
From: ilan-gold <ilanbassgold@gmail.com>
Date: Fri, 8 Nov 2024 12:42:54 +0100
Subject: [PATCH 15/22] (chore): type

---
 tests/test_backed_sparse.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_backed_sparse.py b/tests/test_backed_sparse.py
index 8d7a32a38..1fc7874f8 100644
--- a/tests/test_backed_sparse.py
+++ b/tests/test_backed_sparse.py
@@ -285,7 +285,7 @@ def test_dataset_append_memory(
     assert_equal(fromdisk, frommem)
 
 
-def test_append_array_cache_bust(tmp_path: Path, diskfmt):
+def test_append_array_cache_bust(tmp_path: Path, diskfmt: Literal["h5ad", "zarr"]):
     path = tmp_path / f"test.{diskfmt.replace('ad', '')}"
     a = sparse.random(100, 100, format="csr")
     if diskfmt == "zarr":

From 0304d31cf79212153ad7c6d436834e3e5b822dad Mon Sep 17 00:00:00 2001
From: ilan-gold <ilanbassgold@gmail.com>
Date: Fri, 8 Nov 2024 17:05:23 +0100
Subject: [PATCH 16/22] (chore): types

---
 src/anndata/_core/sparse_dataset.py   | 5 +++--
 src/anndata/_io/specs/lazy_methods.py | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/anndata/_core/sparse_dataset.py b/src/anndata/_core/sparse_dataset.py
index 15335f21f..a155352af 100644
--- a/src/anndata/_core/sparse_dataset.py
+++ b/src/anndata/_core/sparse_dataset.py
@@ -38,6 +38,7 @@
     from scipy.sparse._compressed import _cs_matrix
 
     from .._types import GroupStorageType
+    from ..compat import H5Array
     from .index import Index
 else:
     from scipy.sparse import spmatrix as _cs_matrix
@@ -568,14 +569,14 @@ def _indptr(self) -> np.ndarray:
         return arr
 
     @cached_property
-    def _indices(self) -> np.ndarray:
+    def _indices(self) -> H5Array | ZarrArray:
         """\
         Cache access to the indices to prevent unnecessary reads of the zarray
         """
         return self.group["indices"]
 
     @cached_property
-    def _data(self) -> np.ndarray:
+    def _data(self) -> H5Array | ZarrArray:
         """\
         Cache access to the data to prevent unnecessary reads of the zarray
         """
diff --git a/src/anndata/_io/specs/lazy_methods.py b/src/anndata/_io/specs/lazy_methods.py
index a3b361778..bf7f54522 100644
--- a/src/anndata/_io/specs/lazy_methods.py
+++ b/src/anndata/_io/specs/lazy_methods.py
@@ -30,7 +30,7 @@
 
     P = ParamSpec("P")
     R = TypeVar("R")
-    D = TypeVar("D", bound=CSCDataset | CSRDataset)
+    D = TypeVar("D")
 
 
 @overload

From 76ecda590d7f2c4ffd1a2a36f7e8d100aec382eb Mon Sep 17 00:00:00 2001
From: ilan-gold <ilanbassgold@gmail.com>
Date: Fri, 8 Nov 2024 17:06:06 +0100
Subject: [PATCH 17/22] (chore): better name

---
 src/anndata/_io/specs/lazy_methods.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/anndata/_io/specs/lazy_methods.py b/src/anndata/_io/specs/lazy_methods.py
index bf7f54522..1177f19af 100644
--- a/src/anndata/_io/specs/lazy_methods.py
+++ b/src/anndata/_io/specs/lazy_methods.py
@@ -35,20 +35,20 @@
 
 @overload
 @contextmanager
-def maybe_open_h5(path_or_dataset: D, elem_name: str) -> Generator[D, None, None]: ...
+def maybe_open_h5(path_or_other: D, elem_name: str) -> Generator[D, None, None]: ...
 @overload
 @contextmanager
 def maybe_open_h5(
-    path_or_dataset: Path, elem_name: str
+    path_or_other: Path, elem_name: str
 ) -> Generator[H5File, None, None]: ...
 @contextmanager
 def maybe_open_h5(
-    path_or_dataset: Any, elem_name: str
+    path_or_other: Any, elem_name: str
 ) -> Generator[H5File | D, None, None]:
-    if not isinstance(path_or_dataset, Path):
-        yield path_or_dataset
+    if not isinstance(path_or_other, Path):
+        yield path_or_other
         return
-    file = h5py.File(path_or_dataset, "r")
+    file = h5py.File(path_or_other, "r")
     try:
         yield file[elem_name]
     finally:

From e538a12a3d9b830a4302daac4af360c9d51eddcd Mon Sep 17 00:00:00 2001
From: ilan-gold <ilanbassgold@gmail.com>
Date: Fri, 8 Nov 2024 17:08:26 +0100
Subject: [PATCH 18/22] (Fix): overload type

---
 src/anndata/_io/specs/lazy_methods.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/anndata/_io/specs/lazy_methods.py b/src/anndata/_io/specs/lazy_methods.py
index 1177f19af..738caf488 100644
--- a/src/anndata/_io/specs/lazy_methods.py
+++ b/src/anndata/_io/specs/lazy_methods.py
@@ -18,7 +18,7 @@
 
 if TYPE_CHECKING:
     from collections.abc import Generator, Mapping, Sequence
-    from typing import Any, Literal, ParamSpec, TypeVar
+    from typing import Literal, ParamSpec, TypeVar
 
     from ...compat import DaskArray, H5File, SpArray
     from .registry import DaskReader
@@ -33,17 +33,17 @@
     D = TypeVar("D")
 
 
-@overload
-@contextmanager
-def maybe_open_h5(path_or_other: D, elem_name: str) -> Generator[D, None, None]: ...
 @overload
 @contextmanager
 def maybe_open_h5(
     path_or_other: Path, elem_name: str
 ) -> Generator[H5File, None, None]: ...
+@overload
+@contextmanager
+def maybe_open_h5(path_or_other: D, elem_name: str) -> Generator[D, None, None]: ...
 @contextmanager
 def maybe_open_h5(
-    path_or_other: Any, elem_name: str
+    path_or_other: H5File | D, elem_name: str
 ) -> Generator[H5File | D, None, None]:
     if not isinstance(path_or_other, Path):
         yield path_or_other

From b51321749831b6dce7f0f0dabf0ee7446fab2d2e Mon Sep 17 00:00:00 2001
From: ilan-gold <ilanbassgold@gmail.com>
Date: Fri, 8 Nov 2024 17:10:12 +0100
Subject: [PATCH 19/22] (chore): bring back test comment

---
 tests/test_backed_sparse.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/test_backed_sparse.py b/tests/test_backed_sparse.py
index 1fc7874f8..03155d0a3 100644
--- a/tests/test_backed_sparse.py
+++ b/tests/test_backed_sparse.py
@@ -395,6 +395,7 @@ def test_lazy_array_cache(
     a_disk[3:5]
     a_disk[6:7]
     a_disk[8:9]
+    # one each for .zarray and actual access
     assert store.get_access_count("X/indptr") == 2
     for elem_not_indptr in elems - {"indptr"}:
         assert (

From d2d9f55a433bd6e40dbde49d880774c5a7939feb Mon Sep 17 00:00:00 2001
From: "Philipp A." <flying-sheep@web.de>
Date: Mon, 11 Nov 2024 17:05:49 +0100
Subject: [PATCH 20/22] Update 1744.bugfix.md

---
 docs/release-notes/1744.bugfix.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docs/release-notes/1744.bugfix.md b/docs/release-notes/1744.bugfix.md
index 18023bb16..2a425d43c 100644
--- a/docs/release-notes/1744.bugfix.md
+++ b/docs/release-notes/1744.bugfix.md
@@ -1,2 +1 @@
-
 Cache accesses to the `data` and `indices` arrays in {class}`~anndata.abc.CSRDataset` and {class}`~anndata.abc.CSCDataset` {user}`ilan-gold`

From 3dc0ddd8d5147b4036295297bfb8244856433e89 Mon Sep 17 00:00:00 2001
From: Ilan Gold <ilanbassgold@gmail.com>
Date: Thu, 21 Nov 2024 10:33:35 +0100
Subject: [PATCH 21/22] (fix): revert erroneous change

---
 src/anndata/tests/helpers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/anndata/tests/helpers.py b/src/anndata/tests/helpers.py
index 8b0226634..6ed637ed8 100644
--- a/src/anndata/tests/helpers.py
+++ b/src/anndata/tests/helpers.py
@@ -1057,7 +1057,7 @@ class AccessTrackingStore(DirectoryStore):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self._access_count = Counter()
-        self._accessed_keys = dict()
+        self._accessed_keys = {}
 
     def __getitem__(self, key: str) -> object:
         for tracked in self._access_count:

From 1dcf7adcaa5bf2be26bf70f8615aecb2692feb0b Mon Sep 17 00:00:00 2001
From: ilan-gold <ilanbassgold@gmail.com>
Date: Thu, 21 Nov 2024 11:10:49 +0100
Subject: [PATCH 22/22] (fix): dont generate coo matrices

---
 tests/test_concatenate.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/test_concatenate.py b/tests/test_concatenate.py
index e034debd2..d9f399dd6 100644
--- a/tests/test_concatenate.py
+++ b/tests/test_concatenate.py
@@ -1044,7 +1044,9 @@ def gen_list(n):
 
 
 def gen_sparse(n):
-    return sparse.random(np.random.randint(1, 100), np.random.randint(1, 100))
+    return sparse.random(
+        np.random.randint(1, 100), np.random.randint(1, 100), format="csr"
+    )
 
 
 def gen_something(n):