Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(feat): raising errors where backed is not supported #3048

Merged
merged 40 commits into from
May 21, 2024
Merged
Show file tree
Hide file tree
Changes from 39 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
87450d0
(feat): first step raising errors where `backed` is not supported
ilan-gold May 8, 2024
b6e13eb
(fix): fix `chunked` with `log1p`
ilan-gold May 8, 2024
75a703b
(fix): complications in `log1p` with `chunked`
ilan-gold May 8, 2024
4a654d1
Merge branch 'main' into ig/backed_not_implemented
flying-sheep May 13, 2024
a80dece
remove duplicated import
flying-sheep May 13, 2024
4c22705
(feat): pca check
ilan-gold May 13, 2024
b3c4bb7
Merge branch 'ig/backed_not_implemented' of github.com:scverse/scanpy…
ilan-gold May 13, 2024
cb6116b
(feat): `ingest` check added
ilan-gold May 13, 2024
5b7a3e9
(feat): add `dendrogram` test
ilan-gold May 13, 2024
53e68c1
(feat): add `tsne` check
ilan-gold May 13, 2024
cfa8d57
(feat): `rank_genes_groups`
ilan-gold May 13, 2024
138c9be
(feat): `score_genes` check
ilan-gold May 13, 2024
d68ee47
(chore): add plotting backed tests
ilan-gold May 13, 2024
6632910
Merge branch 'main' into ig/backed_not_implemented
ilan-gold May 14, 2024
788a4cb
(chore): check type instead of `isbacked`
ilan-gold May 14, 2024
06ab4f1
Merge branch 'ig/backed_not_implemented' of github.com:scverse/scanpy…
ilan-gold May 14, 2024
dafda50
(chore): release note
ilan-gold May 14, 2024
786f096
(fix): string formatting error
ilan-gold May 14, 2024
4eb3487
(fix): `worker_id` default
ilan-gold May 14, 2024
4f78c23
Merge branch 'main' into ig/backed_not_implemented
ilan-gold May 14, 2024
3a36196
(fix): try `SparseDataset`
ilan-gold May 14, 2024
9480b1c
Merge branch 'ig/backed_not_implemented' of github.com:scverse/scanpy…
ilan-gold May 14, 2024
8af759a
(fix): correct fixture creation
ilan-gold May 14, 2024
13633c6
Apply suggestions from code review
ilan-gold May 14, 2024
99968db
(chore): consolidate tests
ilan-gold May 14, 2024
9fc9ab0
(chore): remove erroneous plotting tests
ilan-gold May 14, 2024
27c3d08
(chore): remove other `tempfile` import
ilan-gold May 14, 2024
218067a
Merge branch 'main' into ig/backed_not_implemented
ilan-gold May 15, 2024
01a0bcf
(fix): try moving scanpy install
ilan-gold May 15, 2024
7e161bf
(fix): `np.Inf` -> `np.inf`
ilan-gold May 15, 2024
88d51ab
(fix): fail if pynn is newest
ilan-gold May 16, 2024
e4ebaf7
(fix): name
ilan-gold May 16, 2024
037b77f
(fix): `score_genes` name
ilan-gold May 16, 2024
0d96e89
Merge branch 'main' into ig/backed_not_implemented
flying-sheep May 17, 2024
e2e9252
revert 88d51ab
flying-sheep May 17, 2024
54ee86f
session-scoped backed_adata
flying-sheep May 17, 2024
2e8ba99
Merge branch 'main' into ig/backed_not_implemented
ilan-gold May 17, 2024
6b03ac8
(fix): move umap import back to top
ilan-gold May 17, 2024
352166b
(fix): try block shape
ilan-gold May 17, 2024
b0ca228
(fix): revert chunks fix
ilan-gold May 17, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/release-notes/1.10.2.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
```

* Compatibility with `matplotlib` 3.9 {pr}`2999` {smaller}`I Virshup`
* Add clear errors where `backed` mode-like matrices (i.e., from `sparse_dataset`) are not supported {pr}`3048` {smaller}`I gold`

```{rubric} Performance
```
Expand Down
19 changes: 19 additions & 0 deletions scanpy/_utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from typing import TYPE_CHECKING, overload
from weakref import WeakSet

import h5py
import numpy as np
from anndata import __version__ as anndata_version
from packaging.version import Version
Expand All @@ -33,6 +34,13 @@
from .._settings import settings
from .compute.is_constant import is_constant # noqa: F401

if Version(anndata_version) >= Version("0.10.0"):
from anndata._core.sparse_dataset import (

Check warning on line 38 in scanpy/_utils/__init__.py

View check run for this annotation

Codecov / codecov/patch

scanpy/_utils/__init__.py#L38

Added line #L38 was not covered by tests
BaseCompressedSparseDataset as SparseDataset,
)
flying-sheep marked this conversation as resolved.
Show resolved Hide resolved
else:
from anndata._core.sparse_dataset import SparseDataset

if TYPE_CHECKING:
from collections.abc import Mapping
from pathlib import Path
Expand Down Expand Up @@ -1084,3 +1092,14 @@
if axis in {1, "var"}:
return (1, "var")
raise ValueError(f"`axis` must be either 0, 1, 'obs', or 'var', was {axis!r}")


def is_backed_type(X: object) -> bool:
return isinstance(X, (SparseDataset, h5py.File, h5py.Dataset))


def raise_not_implemented_error_if_backed_type(X: object, method_name: str) -> None:
if is_backed_type(X):
raise NotImplementedError(
f"{method_name} is not implemented for matrices of type {type(X)}"
)
11 changes: 9 additions & 2 deletions scanpy/preprocessing/_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from .. import logging as logg
from .._compat import DaskArray, pkg_version
from .._settings import settings
from .._utils import _doc_params, _empty
from .._utils import _doc_params, _empty, is_backed_type
from ..get import _check_mask, _get_obs_rep
from ._docs import doc_mask_var_hvg
from ._utils import _get_mean_var
Expand Down Expand Up @@ -173,6 +173,10 @@ def pca(
)
data_is_AnnData = isinstance(data, AnnData)
if data_is_AnnData:
if layer is None and not chunked and is_backed_type(data.X):
raise NotImplementedError(
f"PCA is not implemented for matrices of type {type(data.X)} with chunked as False"
)
adata = data.copy() if copy else data
else:
if pkg_version("anndata") < Version("0.8.0rc1"):
Expand All @@ -195,7 +199,10 @@ def pca(
logg.info(f" with n_comps={n_comps}")

X = _get_obs_rep(adata_comp, layer=layer)

if is_backed_type(X) and layer is not None:
raise NotImplementedError(
f"PCA is not implemented for matrices of type {type(X)} from layers"
)
# See: https://github.com/scverse/scanpy/pull/2816#issuecomment-1932650529
if (
Version(ad.__version__) < Version("0.9")
Expand Down
2 changes: 2 additions & 0 deletions scanpy/preprocessing/_scale.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from .._utils import (
_check_array_function_arguments,
axis_mul_or_truediv,
raise_not_implemented_error_if_backed_type,
renamed_arg,
view_to_actual,
)
Expand Down Expand Up @@ -298,6 +299,7 @@ def scale_anndata(
mask_obs = _check_mask(adata, mask_obs, "obs")
view_to_actual(adata)
X = _get_obs_rep(adata, layer=layer, obsm=obsm)
raise_not_implemented_error_if_backed_type(X, "scale")
X, adata.var[str_mean_std[0]], adata.var[str_mean_std[1]] = scale(
X,
zero_center=zero_center,
Expand Down
15 changes: 15 additions & 0 deletions scanpy/preprocessing/_simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
from .._utils import (
_check_array_function_arguments,
axis_sum,
is_backed_type,
raise_not_implemented_error_if_backed_type,
renamed_arg,
sanitize_anndata,
view_to_actual,
Expand Down Expand Up @@ -145,6 +147,7 @@ def filter_cells(
"`min_genes`, `max_counts`, `max_genes` per call."
)
if isinstance(data, AnnData):
raise_not_implemented_error_if_backed_type(data.X, "filter_cells")
adata = data.copy() if copy else data
cell_subset, number = materialize_as_ndarray(
filter_cells(
Expand Down Expand Up @@ -260,6 +263,7 @@ def filter_genes(
)

if isinstance(data, AnnData):
raise_not_implemented_error_if_backed_type(data.X, "filter_genes")
adata = data.copy() if copy else data
gene_subset, number = materialize_as_ndarray(
filter_genes(
Expand Down Expand Up @@ -405,10 +409,19 @@ def log1p_anndata(
raise NotImplementedError(
"Currently cannot perform chunked operations on arrays not stored in X."
)
if adata.isbacked and adata.file._filemode != "r+":
raise NotImplementedError(
"log1p is not implemented for backed AnnData with backed mode not r+"
)
for chunk, start, end in adata.chunked_X(chunk_size):
adata.X[start:end] = log1p(chunk, base=base, copy=False)
else:
X = _get_obs_rep(adata, layer=layer, obsm=obsm)
if is_backed_type(X):
msg = f"log1p is not implemented for matrices of type {type(X)}"
if layer is not None:
raise NotImplementedError(f"{msg} from layers")
raise NotImplementedError(f"{msg} without `chunked=True`")
X = log1p(X, copy=False, base=base)
_set_obs_rep(adata, X, layer=layer, obsm=obsm)

Expand Down Expand Up @@ -647,6 +660,7 @@ def regress_out(
keys = [keys]

X = _get_obs_rep(adata, layer=layer)
raise_not_implemented_error_if_backed_type(X, "regress_out")

if issparse(X):
logg.info(" sparse input is densified and may " "lead to high memory use")
Expand Down Expand Up @@ -855,6 +869,7 @@ def downsample_counts(
`adata.X` : :class:`numpy.ndarray` | :class:`scipy.sparse.spmatrix` (dtype `float`)
Downsampled counts matrix.
"""
raise_not_implemented_error_if_backed_type(adata.X, "downsample_counts")
# This logic is all dispatch
total_counts_call = total_counts is not None
counts_per_cell_call = counts_per_cell is not None
Expand Down
98 changes: 98 additions & 0 deletions scanpy/tests/test_backed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
from __future__ import annotations

from functools import partial

import pytest
from anndata import read_h5ad

import scanpy as sc


@pytest.mark.parametrize(
("name", "func", "msg"),
[
pytest.param("PCA", sc.pp.pca, " with chunked as False", id="pca"),
pytest.param(
"PCA", partial(sc.pp.pca, layer="X_copy"), " from layers", id="pca_layer"
),
pytest.param(
"regress_out",
partial(sc.pp.regress_out, keys=["n_counts", "percent_mito"]),
"",
id="regress_out",
),
pytest.param(
"dendrogram", partial(sc.tl.dendrogram, groupby="cat"), "", id="dendrogram"
),
pytest.param("tsne", sc.tl.tsne, "", id="tsne"),
pytest.param("scale", sc.pp.scale, "", id="scale"),
pytest.param(
"downsample_counts",
partial(sc.pp.downsample_counts, counts_per_cell=1000),
"",
id="downsample_counts",
),
pytest.param(
"filter_genes",
partial(sc.pp.filter_genes, max_cells=1000),
"",
id="filter_genes",
),
pytest.param(
"filter_cells",
partial(sc.pp.filter_cells, max_genes=1000),
"",
id="filter_cells",
),
pytest.param(
"rank_genes_groups",
partial(sc.tl.rank_genes_groups, groupby="cat"),
"",
id="rank_genes_groups",
),
pytest.param(
"score_genes",
partial(sc.tl.score_genes, gene_list=map(str, range(100))),
"",
id="score_genes",
),
],
)
def test_backed_error(backed_adata, name, func, msg):
with pytest.raises(
NotImplementedError,
match=f"{name} is not implemented for matrices of type {type(backed_adata.X)}{msg}",
):
func(backed_adata)


def test_log1p_backed_errors(backed_adata):
with pytest.raises(
NotImplementedError,
match="log1p is not implemented for backed AnnData with backed mode not r+",
):
sc.pp.log1p(backed_adata, chunked=True)
backed_adata.file.close()
backed_adata = read_h5ad(backed_adata.filename, backed="r+")
with pytest.raises(
NotImplementedError,
match=f"log1p is not implemented for matrices of type {type(backed_adata.X)} without `chunked=True`",
):
sc.pp.log1p(backed_adata)
backed_adata.layers["X_copy"] = backed_adata.X
layer_type = type(backed_adata.layers["X_copy"])
with pytest.raises(
NotImplementedError,
match=f"log1p is not implemented for matrices of type {layer_type} from layers",
):
sc.pp.log1p(backed_adata, layer="X_copy")
backed_adata.file.close()


def test_scatter_backed(backed_adata):
sc.pp.pca(backed_adata, chunked=True)
sc.pl.scatter(backed_adata, color="0", basis="pca")


def test_dotplot_backed(backed_adata):
sc.pl.dotplot(backed_adata, ["0", "1", "2", "3"], groupby="cat")
17 changes: 17 additions & 0 deletions scanpy/tests/test_ingest.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import anndata
import numpy as np
import pytest
from sklearn.neighbors import KDTree
Expand Down Expand Up @@ -153,3 +154,19 @@ def test_ingest_map_embedding_umap():
umap_transformed_t = reducer.transform(T)

assert np.allclose(ing._obsm["X_umap"], umap_transformed_t)


def test_ingest_backed(adatas, tmp_path):
adata_ref = adatas[0].copy()
adata_new = adatas[1].copy()

adata_new.write_h5ad(f"{tmp_path}/new.h5ad")

adata_new = anndata.read_h5ad(f"{tmp_path}/new.h5ad", backed="r")

ing = sc.tl.Ingest(adata_ref)
with pytest.raises(
NotImplementedError,
match=f"Ingest.fit is not implemented for matrices of type {type(adata_new.X)}",
):
ing.fit(adata_new)
2 changes: 1 addition & 1 deletion scanpy/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,6 @@ def test_is_constant_dask(axis, expected, block_type):
[0, 0, 1, 0],
[0, 0, 0, 0],
]
x = da.from_array(np.array(x_data), chunks=2).map_blocks(block_type)
x = da.from_array(np.array(x_data), chunks=(2, 2)).map_blocks(block_type)

np.testing.assert_array_equal(expected, is_constant(x, axis=axis))
4 changes: 3 additions & 1 deletion scanpy/tools/_dendrogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from .. import logging as logg
from .._compat import old_positionals
from .._utils import _doc_params
from .._utils import _doc_params, raise_not_implemented_error_if_backed_type
from ..neighbors._doc import doc_n_pcs, doc_use_rep
from ._utils import _choose_representation

Expand Down Expand Up @@ -117,6 +117,8 @@ def dendrogram(
>>> markers = ['C1QA', 'PSAP', 'CD79A', 'CD79B', 'CST3', 'LYZ']
>>> sc.pl.dotplot(adata, markers, groupby='bulk_labels', dendrogram=True)
"""

raise_not_implemented_error_if_backed_type(adata.X, "dendrogram")
if isinstance(groupby, str):
# if not a list, turn into a list
groupby = [groupby]
Expand Down
3 changes: 2 additions & 1 deletion scanpy/tools/_ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from .. import logging as logg
from .._compat import old_positionals, pkg_version
from .._settings import settings
from .._utils import NeighborsView
from .._utils import NeighborsView, raise_not_implemented_error_if_backed_type
from .._utils._doctests import doctest_skip
from ..neighbors import FlatTree

Expand Down Expand Up @@ -392,6 +392,7 @@ def fit(self, adata_new):
`adata` refers to the :class:`~anndata.AnnData` object
that is passed during the initialization of an Ingest instance.
"""
raise_not_implemented_error_if_backed_type(adata_new.X, "Ingest.fit")
ref_var_names = self._adata_ref.var_names.str.upper()
new_var_names = adata_new.var_names.str.upper()

Expand Down
7 changes: 5 additions & 2 deletions scanpy/tools/_rank_genes_groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@
from .. import _utils
from .. import logging as logg
from .._compat import old_positionals
from .._utils import check_nonnegative_integers
from .._utils import (
check_nonnegative_integers,
raise_not_implemented_error_if_backed_type,
)
from ..get import _check_mask
from ..preprocessing._utils import _get_mean_var

Expand Down Expand Up @@ -134,6 +137,7 @@ def __init__(
if use_raw and adata.raw is not None:
adata_comp = adata.raw
X = adata_comp.X
raise_not_implemented_error_if_backed_type(X, "rank_genes_groups")

# for correct getnnz calculation
if issparse(X):
Expand Down Expand Up @@ -594,7 +598,6 @@ def rank_genes_groups(
>>> # to visualize the results
>>> sc.pl.rank_genes_groups(adata)
"""

if mask_var is not None:
mask_var = _check_mask(adata, mask_var, "var")

Expand Down
6 changes: 5 additions & 1 deletion scanpy/tools/_score_genes.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import pandas as pd
from scipy.sparse import issparse

from scanpy._utils import _check_use_raw
from scanpy._utils import _check_use_raw, is_backed_type

from .. import logging as logg
from .._compat import old_positionals
Expand Down Expand Up @@ -110,6 +110,10 @@ def score_genes(
start = logg.info(f"computing score {score_name!r}")
adata = adata.copy() if copy else adata
use_raw = _check_use_raw(adata, use_raw)
if is_backed_type(adata.X) and not use_raw:
raise NotImplementedError(
f"score_genes is not implemented for matrices of type {type(adata.X)}"
)

if random_state is not None:
np.random.seed(random_state)
Expand Down
3 changes: 2 additions & 1 deletion scanpy/tools/_tsne.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from .. import logging as logg
from .._compat import old_positionals
from .._settings import settings
from .._utils import _doc_params
from .._utils import _doc_params, raise_not_implemented_error_if_backed_type
from ..neighbors._doc import doc_n_pcs, doc_use_rep
from ._utils import _choose_representation

Expand Down Expand Up @@ -106,6 +106,7 @@ def tsne(
start = logg.info("computing tSNE")
adata = adata.copy() if copy else adata
X = _choose_representation(adata, use_rep=use_rep, n_pcs=n_pcs)
raise_not_implemented_error_if_backed_type(X, "tsne")
# params for sklearn
n_jobs = settings.n_jobs if n_jobs is None else n_jobs
params_sklearn = dict(
Expand Down
Loading
Loading