From af0c1e105ec72c4024e0760501330fdb2b3ad5cb Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Tue, 5 Sep 2023 14:29:37 +0200 Subject: [PATCH 01/18] add basic view tests --- anndata/tests/test_views.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/anndata/tests/test_views.py b/anndata/tests/test_views.py index 6770b2000..7945d17ad 100644 --- a/anndata/tests/test_views.py +++ b/anndata/tests/test_views.py @@ -659,3 +659,14 @@ def test_copy_X_dtype(): adata = ad.AnnData(sparse.eye(50, dtype=np.float64, format="csr")) adata_c = adata[::2].copy() assert adata_c.X.dtype == adata.X.dtype + + +def test_x_none(): + orig = ad.AnnData(obs=pd.DataFrame(index=np.arange(50))) + assert orig.shape == (50, 0) + view = orig[2:4] + assert view.shape == (2, 0) + assert view.obs_names.tolist() == ["2", "3"] + new = view.copy() + assert new.shape == (2, 0) + assert new.obs_names.tolist() == ["2", "3"] From 97655f78b73b2e5ccdf5d6385f043d57acfdd1f4 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Tue, 5 Sep 2023 15:04:08 +0200 Subject: [PATCH 02/18] Allow passing dicts as obs/var --- anndata/_core/anndata.py | 49 ++++++++++++++++++++++------------------ anndata/_core/raw.py | 2 +- anndata/tests/test_x.py | 22 ++++++++++++++++-- 3 files changed, 48 insertions(+), 25 deletions(-) diff --git a/anndata/_core/anndata.py b/anndata/_core/anndata.py index b19381b0b..79f7ead6f 100644 --- a/anndata/_core/anndata.py +++ b/anndata/_core/anndata.py @@ -103,7 +103,9 @@ def _check_2d_shape(X): @singledispatch -def _gen_dataframe(anno, length, index_names): +def _gen_dataframe( + anno: Mapping[str, Any], index_names: Iterable[str], length: int | None = None +) -> pd.DataFrame: if anno is None or len(anno) == 0: anno = {} for index_name in index_names: @@ -113,15 +115,27 @@ def _gen_dataframe(anno, length, index_names): index=anno[index_name], columns=[k for k in anno.keys() if k != index_name], ) - return pd.DataFrame( + + def mk_index(l: int) -> pd.Index: + return pd.RangeIndex(0, l, name=None).astype(str) + + df = pd.DataFrame( anno, - index=pd.RangeIndex(0, length, name=None).astype(str), + index=None if length is None else mk_index(length), columns=None if len(anno) else [], ) + if length is None: + df.index = mk_index(len(df)) + return df @_gen_dataframe.register(pd.DataFrame) -def _(anno, length, index_names): +def _gen_dataframe_df( + anno: pd.DataFrame, index_names: Iterable[str], length: int | None = None +): + if length is not None and length != len(anno): + msg = f"`shape` is inconsistent with `{index_names[0].split('_')[0]}`" + raise ValueError(msg) anno = anno.copy(deep=False) if not is_string_dtype(anno.index): warnings.warn("Transforming to str index.", ImplicitModificationWarning) @@ -133,7 +147,9 @@ def _(anno, length, index_names): @_gen_dataframe.register(pd.Series) @_gen_dataframe.register(pd.Index) -def _(anno, length, index_names): +def _gen_dataframe_1d( + anno: pd.Series | pd.Index, index_names: Iterable[str], length: int | None = None +): raise ValueError(f"Cannot convert {type(anno)} to DataFrame") @@ -473,27 +489,16 @@ def _init_as_actual( X = np.array(X, dtype, copy=False) # data matrix and shape self._X = X - self._n_obs, self._n_vars = self._X.shape + n_obs, n_vars = self._X.shape else: self._X = None - self._n_obs = len([] if obs is None else obs) - self._n_vars = len([] if var is None else var) - # check consistency with shape - if shape is not None: - if self._n_obs == 0: - self._n_obs = shape[0] - else: - if self._n_obs != shape[0]: - raise ValueError("`shape` is inconsistent with `obs`") - if self._n_vars == 0: - self._n_vars = shape[1] - else: - if self._n_vars != shape[1]: - raise ValueError("`shape` is inconsistent with `var`") + n_obs, n_vars = (None, None) if shape is None else shape # annotations - self._obs = _gen_dataframe(obs, self._n_obs, ["obs_names", "row_names"]) - self._var = _gen_dataframe(var, self._n_vars, ["var_names", "col_names"]) + self._obs = _gen_dataframe(obs, ["obs_names", "row_names"], n_obs) + self._var = _gen_dataframe(var, ["var_names", "col_names"], n_vars) + self._n_obs = len(self.obs) + self._n_vars = len(self.var) # now we can verify if indices match! for attr_name, x_name, idx in x_indices: diff --git a/anndata/_core/raw.py b/anndata/_core/raw.py index f248b99be..c2f916dc1 100644 --- a/anndata/_core/raw.py +++ b/anndata/_core/raw.py @@ -34,7 +34,7 @@ def __init__( self._X = X.get() else: self._X = X - self._var = _gen_dataframe(var, self.X.shape[1], ["var_names"]) + self._var = _gen_dataframe(var, ["var_names"], self.X.shape[1]) self._varm = AxisArrays(self, 1, varm) elif X is None: # construct from adata # Move from GPU to CPU since it's large and not always used diff --git a/anndata/tests/test_x.py b/anndata/tests/test_x.py index fb333504c..524950b56 100644 --- a/anndata/tests/test_x.py +++ b/anndata/tests/test_x.py @@ -65,8 +65,26 @@ def test_del_set_equiv_X(): assert orig.X is None -def test_init_X_as_none(): - # test initialiser +@pytest.mark.parametrize( + ("obs", "var", "shape"), + [ + pytest.param(dict(obs_names=["1", "2"]), None, (2, 0), id="obs"), + pytest.param(None, dict(var_names=["a", "b"]), (0, 2), id="var"), + pytest.param( + dict(obs_names=["1", "2", "3"]), + dict(var_names=["a", "b"]), + (3, 2), + id="both", + ), + ], +) +def test_init_x_as_none_shape_from_obs_var(obs, var, shape): + adata = AnnData(None, obs, var) + assert adata.X is None + assert adata.shape == shape + + +def test_init_x_as_none_explicit_shape(): shape = (3, 5) adata = AnnData(None, uns=dict(test=np.array((3, 3))), shape=shape) assert adata.X is None From c2a193ebfc8ccac2403a868a4bd4dae7972e0e41 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Tue, 5 Sep 2023 15:24:00 +0200 Subject: [PATCH 03/18] simplify --- anndata/_core/anndata.py | 39 ++++++++++++++++----------------------- 1 file changed, 16 insertions(+), 23 deletions(-) diff --git a/anndata/_core/anndata.py b/anndata/_core/anndata.py index 79f7ead6f..ca41d55d8 100644 --- a/anndata/_core/anndata.py +++ b/anndata/_core/anndata.py @@ -126,6 +126,13 @@ def mk_index(l: int) -> pd.Index: ) if length is None: df.index = mk_index(len(df)) + elif length != len(df): + attr = index_names[0].split("_")[0] + what = "row" if attr == "obs" else "column" + raise ValueError( + f"Observations annot. `{attr}` must have number of {what}s of `X`" + f" ({length}), but has {len(df)} {what}s." + ) return df @@ -372,8 +379,6 @@ def _init_as_view(self, adata_ref: "AnnData", oidx: Index, vidx: Index): self._obs = DataFrameView(obs_sub, view_args=(self, "obs")) self._var = DataFrameView(var_sub, view_args=(self, "var")) self._uns = uns - self._n_obs = len(self.obs) - self._n_vars = len(self.var) # set data if self.isbacked: @@ -497,8 +502,6 @@ def _init_as_actual( # annotations self._obs = _gen_dataframe(obs, ["obs_names", "row_names"], n_obs) self._var = _gen_dataframe(var, ["var_names", "col_names"], n_vars) - self._n_obs = len(self.obs) - self._n_vars = len(self.var) # now we can verify if indices match! for attr_name, x_name, idx in x_indices: @@ -788,12 +791,12 @@ def raw(self): @property def n_obs(self) -> int: """Number of observations.""" - return self._n_obs + return len(self.obs) @property def n_vars(self) -> int: """Number of variables/features.""" - return self._n_vars + return len(self.var) def _set_dim_df(self, value: pd.DataFrame, attr: str): if not isinstance(value, pd.DataFrame): @@ -1860,38 +1863,28 @@ def __contains__(self, key: Any): def _check_dimensions(self, key=None): if key is None: - key = {"obs", "var", "obsm", "varm"} + key = {"obsm", "varm"} else: key = {key} - if "obs" in key and len(self._obs) != self._n_obs: - raise ValueError( - "Observations annot. `obs` must have number of rows of `X`" - f" ({self._n_obs}), but has {self._obs.shape[0]} rows." - ) - if "var" in key and len(self._var) != self._n_vars: - raise ValueError( - "Variables annot. `var` must have number of columns of `X`" - f" ({self._n_vars}), but has {self._var.shape[0]} rows." - ) if "obsm" in key: obsm = self._obsm if ( - not all([dim_len(o, 0) == self._n_obs for o in obsm.values()]) - and len(obsm.dim_names) != self._n_obs + not all([dim_len(o, 0) == self.n_obs for o in obsm.values()]) + and len(obsm.dim_names) != self.n_obs ): raise ValueError( "Observations annot. `obsm` must have number of rows of `X`" - f" ({self._n_obs}), but has {len(obsm)} rows." + f" ({self.n_obs}), but has {len(obsm)} rows." ) if "varm" in key: varm = self._varm if ( - not all([dim_len(v, 0) == self._n_vars for v in varm.values()]) - and len(varm.dim_names) != self._n_vars + not all([dim_len(v, 0) == self.n_vars for v in varm.values()]) + and len(varm.dim_names) != self.n_vars ): raise ValueError( "Variables annot. `varm` must have number of columns of `X`" - f" ({self._n_vars}), but has {len(varm)} rows." + f" ({self.n_vars}), but has {len(varm)} rows." ) def write_h5ad( From ae4e1c3332e6da2e6a618eb43311eff36850028f Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Tue, 5 Sep 2023 16:04:36 +0200 Subject: [PATCH 04/18] add tests for errors --- anndata/tests/test_base.py | 39 +++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/anndata/tests/test_base.py b/anndata/tests/test_base.py index d045bc5ca..e224dde04 100644 --- a/anndata/tests/test_base.py +++ b/anndata/tests/test_base.py @@ -1,4 +1,5 @@ from itertools import product +import re import warnings import numpy as np @@ -39,9 +40,6 @@ def test_creation(): assert adata.raw.X.tolist() == X.tolist() assert adata.raw.var_names.tolist() == ["a", "b", "c"] - with pytest.raises(ValueError): - AnnData(np.array([[1, 2], [3, 4]]), dict(TooLong=[1, 2, 3, 4])) - # init with empty data matrix shape = (3, 5) adata = AnnData(None, uns=dict(test=np.array((3, 3))), shape=shape) @@ -50,6 +48,41 @@ def test_creation(): assert "test" in adata.uns +@pytest.mark.parametrize( + "src_kw", + [ + pytest.param(dict(X=adata_dense.X), id="x"), + pytest.param(dict(shape=(2, 2)), id="shape"), + ], +) +@pytest.mark.parametrize("dim", ["obs", "var"]) +@pytest.mark.parametrize( + ("dim_arg", "msg_template"), + [ + pytest.param( + dict(TooLong=[1, 2, 3, 4]), + "Length of values (4) does not match length of index (2)", + id="too_long_col", + ), + pytest.param( + dict(obs_names=["a", "b", "c"]), + "`{dim}` must have number of {mat_dim}s of `X`", + id="too_many_names", + ), + pytest.param( + pd.DataFrame(index=["a", "b", "c"]), + "`{dim}` must have number of {mat_dim}s of `X`", + id="too_long_df", + ), + ], +) +def test_creation_error(src_kw, dim, dim_arg, msg_template: str): + mat_dim = "row" if dim == "obs" else "column" + msg = msg_template.format(dim=dim, mat_dim=mat_dim) + with pytest.raises(ValueError, match=re.escape(msg)): + AnnData(**src_kw, **{dim: dim_arg}) + + def test_create_with_dfs(): X = np.ones((6, 3)) obs = pd.DataFrame(dict(cat_anno=pd.Categorical(["a", "a", "a", "a", "b", "a"]))) From 55f2eca406b812d6920df2a85e933a8638b93a85 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Tue, 5 Sep 2023 16:30:48 +0200 Subject: [PATCH 05/18] fix test --- anndata/tests/test_base.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/anndata/tests/test_base.py b/anndata/tests/test_base.py index e224dde04..14f963a13 100644 --- a/anndata/tests/test_base.py +++ b/anndata/tests/test_base.py @@ -60,17 +60,17 @@ def test_creation(): ("dim_arg", "msg_template"), [ pytest.param( - dict(TooLong=[1, 2, 3, 4]), + lambda _: dict(TooLong=[1, 2, 3, 4]), "Length of values (4) does not match length of index (2)", id="too_long_col", ), pytest.param( - dict(obs_names=["a", "b", "c"]), + lambda dim: {f"{dim}_names": ["a", "b", "c"]}, "`{dim}` must have number of {mat_dim}s of `X`", id="too_many_names", ), pytest.param( - pd.DataFrame(index=["a", "b", "c"]), + lambda _: pd.DataFrame(index=["a", "b", "c"]), "`{dim}` must have number of {mat_dim}s of `X`", id="too_long_df", ), @@ -80,7 +80,7 @@ def test_creation_error(src_kw, dim, dim_arg, msg_template: str): mat_dim = "row" if dim == "obs" else "column" msg = msg_template.format(dim=dim, mat_dim=mat_dim) with pytest.raises(ValueError, match=re.escape(msg)): - AnnData(**src_kw, **{dim: dim_arg}) + AnnData(**src_kw, **{dim: dim_arg(dim)}) def test_create_with_dfs(): From b0b6078d8ad5cd13b4c55ff5289f6c4647c0ec0e Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Tue, 5 Sep 2023 16:38:49 +0200 Subject: [PATCH 06/18] really fix tests --- anndata/tests/test_base.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/anndata/tests/test_base.py b/anndata/tests/test_base.py index 14f963a13..c48288605 100644 --- a/anndata/tests/test_base.py +++ b/anndata/tests/test_base.py @@ -49,15 +49,15 @@ def test_creation(): @pytest.mark.parametrize( - "src_kw", + ("src", "src_arg"), [ - pytest.param(dict(X=adata_dense.X), id="x"), - pytest.param(dict(shape=(2, 2)), id="shape"), + pytest.param("X", adata_dense.X, id="x"), + pytest.param("shape", (2, 2), id="shape"), ], ) @pytest.mark.parametrize("dim", ["obs", "var"]) @pytest.mark.parametrize( - ("dim_arg", "msg_template"), + ("dim_arg", "msg"), [ pytest.param( lambda _: dict(TooLong=[1, 2, 3, 4]), @@ -65,22 +65,22 @@ def test_creation(): id="too_long_col", ), pytest.param( - lambda dim: {f"{dim}_names": ["a", "b", "c"]}, - "`{dim}` must have number of {mat_dim}s of `X`", - id="too_many_names", + lambda dim: {f"{dim}_names": ["a", "b", "c"]}, None, id="too_many_names" ), pytest.param( - lambda _: pd.DataFrame(index=["a", "b", "c"]), - "`{dim}` must have number of {mat_dim}s of `X`", - id="too_long_df", + lambda _: pd.DataFrame(index=["a", "b", "c"]), None, id="too_long_df" ), ], ) -def test_creation_error(src_kw, dim, dim_arg, msg_template: str): +def test_creation_error(src, src_arg, dim, dim_arg, msg: str | None): mat_dim = "row" if dim == "obs" else "column" - msg = msg_template.format(dim=dim, mat_dim=mat_dim) + if msg is None: + msg = dict( + X=f"`{dim}` must have number of {mat_dim}s of `X`", + shape=f"`shape` is inconsistent with `{dim}`", + )[src] with pytest.raises(ValueError, match=re.escape(msg)): - AnnData(**src_kw, **{dim: dim_arg(dim)}) + AnnData(**{src: src_arg, dim: dim_arg(dim)}) def test_create_with_dfs(): From 9f45ce006f2a09c2cf3d0f5198f41e3349fde44e Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Tue, 5 Sep 2023 16:41:01 +0200 Subject: [PATCH 07/18] clearer tests --- anndata/tests/test_base.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/anndata/tests/test_base.py b/anndata/tests/test_base.py index c48288605..1b41dbffb 100644 --- a/anndata/tests/test_base.py +++ b/anndata/tests/test_base.py @@ -49,10 +49,14 @@ def test_creation(): @pytest.mark.parametrize( - ("src", "src_arg"), + ("src", "src_arg", "dim_msg"), [ - pytest.param("X", adata_dense.X, id="x"), - pytest.param("shape", (2, 2), id="shape"), + pytest.param( + "X", adata_dense.X, "`{dim}` must have number of {mat_dim}s of `X`", id="x" + ), + pytest.param( + "shape", (2, 2), "`shape` is inconsistent with `{dim}`", id="shape" + ), ], ) @pytest.mark.parametrize("dim", ["obs", "var"]) @@ -72,13 +76,10 @@ def test_creation(): ), ], ) -def test_creation_error(src, src_arg, dim, dim_arg, msg: str | None): +def test_creation_error(src, src_arg, dim_msg, dim, dim_arg, msg: str | None): mat_dim = "row" if dim == "obs" else "column" if msg is None: - msg = dict( - X=f"`{dim}` must have number of {mat_dim}s of `X`", - shape=f"`shape` is inconsistent with `{dim}`", - )[src] + msg = dim_msg.format(dim=dim, mat_dim=mat_dim) with pytest.raises(ValueError, match=re.escape(msg)): AnnData(**{src: src_arg, dim: dim_arg(dim)}) From 2e20a06d70754f5840203f729c0ab71145e57f34 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Tue, 5 Sep 2023 17:00:28 +0200 Subject: [PATCH 08/18] add source --- anndata/_core/anndata.py | 63 +++++++++++++++++++++++++++++--------- anndata/_core/raw.py | 4 ++- anndata/tests/test_base.py | 2 +- 3 files changed, 53 insertions(+), 16 deletions(-) diff --git a/anndata/_core/anndata.py b/anndata/_core/anndata.py index ca41d55d8..cfb21df30 100644 --- a/anndata/_core/anndata.py +++ b/anndata/_core/anndata.py @@ -102,9 +102,34 @@ def _check_2d_shape(X): ) +def _mk_df_error( + source: Literal["X", "shape"], + attr: Literal["obs", "var"], + expected: int, + actual: int, +): + if source == "X": + what = "row" if attr == "obs" else "column" + msg = ( + f"Observations annot. `{attr}` must have number of {what}s of `X`" + f" ({expected}), but has {actual} {what}s." + ) + else: + msg = ( + f"`shape` is inconsistent with `{attr}` " + "({actual} {what}s instead of {expected})" + ) + return ValueError(msg) + + @singledispatch def _gen_dataframe( - anno: Mapping[str, Any], index_names: Iterable[str], length: int | None = None + anno: Mapping[str, Any], + index_names: Iterable[str], + *, + source: Literal["X", "shape"], + attr: Literal["obs", "var"], + length: int | None = None, ) -> pd.DataFrame: if anno is None or len(anno) == 0: anno = {} @@ -127,22 +152,21 @@ def mk_index(l: int) -> pd.Index: if length is None: df.index = mk_index(len(df)) elif length != len(df): - attr = index_names[0].split("_")[0] - what = "row" if attr == "obs" else "column" - raise ValueError( - f"Observations annot. `{attr}` must have number of {what}s of `X`" - f" ({length}), but has {len(df)} {what}s." - ) + raise _mk_df_error(source, attr, length, len(df)) return df @_gen_dataframe.register(pd.DataFrame) def _gen_dataframe_df( - anno: pd.DataFrame, index_names: Iterable[str], length: int | None = None + anno: pd.DataFrame, + index_names: Iterable[str], + *, + source: Literal["X", "shape"], + attr: Literal["obs", "var"], + length: int | None = None, ): if length is not None and length != len(anno): - msg = f"`shape` is inconsistent with `{index_names[0].split('_')[0]}`" - raise ValueError(msg) + raise _mk_df_error(source, attr, length, len(anno)) anno = anno.copy(deep=False) if not is_string_dtype(anno.index): warnings.warn("Transforming to str index.", ImplicitModificationWarning) @@ -155,9 +179,14 @@ def _gen_dataframe_df( @_gen_dataframe.register(pd.Series) @_gen_dataframe.register(pd.Index) def _gen_dataframe_1d( - anno: pd.Series | pd.Index, index_names: Iterable[str], length: int | None = None + anno: pd.Series | pd.Index, + index_names: Iterable[str], + *, + source: Literal["X", "shape"], + attr: Literal["obs", "var"], + length: int | None = None, ): - raise ValueError(f"Cannot convert {type(anno)} to DataFrame") + raise ValueError(f"Cannot convert {type(anno)} to {attr} DataFrame") class AnnData(metaclass=utils.DeprecationMixinMeta): @@ -495,13 +524,19 @@ def _init_as_actual( # data matrix and shape self._X = X n_obs, n_vars = self._X.shape + source = "X" else: self._X = None n_obs, n_vars = (None, None) if shape is None else shape + source = "shape" # annotations - self._obs = _gen_dataframe(obs, ["obs_names", "row_names"], n_obs) - self._var = _gen_dataframe(var, ["var_names", "col_names"], n_vars) + self._obs = _gen_dataframe( + obs, ["obs_names", "row_names"], source=source, attr="obs", length=n_obs + ) + self._var = _gen_dataframe( + var, ["var_names", "col_names"], source=source, attr="var", length=n_vars + ) # now we can verify if indices match! for attr_name, x_name, idx in x_indices: diff --git a/anndata/_core/raw.py b/anndata/_core/raw.py index c2f916dc1..0937ad788 100644 --- a/anndata/_core/raw.py +++ b/anndata/_core/raw.py @@ -34,7 +34,9 @@ def __init__( self._X = X.get() else: self._X = X - self._var = _gen_dataframe(var, ["var_names"], self.X.shape[1]) + self._var = _gen_dataframe( + var, ["var_names"], source="X", attr="var", length=self.X.shape[1] + ) self._varm = AxisArrays(self, 1, varm) elif X is None: # construct from adata # Move from GPU to CPU since it's large and not always used diff --git a/anndata/tests/test_base.py b/anndata/tests/test_base.py index 1b41dbffb..9d19d366a 100644 --- a/anndata/tests/test_base.py +++ b/anndata/tests/test_base.py @@ -77,8 +77,8 @@ def test_creation(): ], ) def test_creation_error(src, src_arg, dim_msg, dim, dim_arg, msg: str | None): - mat_dim = "row" if dim == "obs" else "column" if msg is None: + mat_dim = "row" if dim == "obs" else "column" msg = dim_msg.format(dim=dim, mat_dim=mat_dim) with pytest.raises(ValueError, match=re.escape(msg)): AnnData(**{src: src_arg, dim: dim_arg(dim)}) From 54f27c59e8eef47650a7737f41e149f4ee138c8e Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Tue, 5 Sep 2023 17:05:34 +0200 Subject: [PATCH 09/18] fix remaining tests --- anndata/_core/anndata.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/anndata/_core/anndata.py b/anndata/_core/anndata.py index cfb21df30..df6a5abfd 100644 --- a/anndata/_core/anndata.py +++ b/anndata/_core/anndata.py @@ -133,22 +133,26 @@ def _gen_dataframe( ) -> pd.DataFrame: if anno is None or len(anno) == 0: anno = {} - for index_name in index_names: - if index_name in anno: - return pd.DataFrame( - anno, - index=anno[index_name], - columns=[k for k in anno.keys() if k != index_name], - ) def mk_index(l: int) -> pd.Index: return pd.RangeIndex(0, l, name=None).astype(str) - df = pd.DataFrame( - anno, - index=None if length is None else mk_index(length), - columns=None if len(anno) else [], - ) + for index_name in index_names: + if index_name not in anno: + continue + df = pd.DataFrame( + anno, + index=anno[index_name], + columns=[k for k in anno.keys() if k != index_name], + ) + break + else: + df = pd.DataFrame( + anno, + index=None if length is None else mk_index(length), + columns=None if len(anno) else [], + ) + if length is None: df.index = mk_index(len(df)) elif length != len(df): From 4fb539dee44d8d49494617646fef7d8a912ae78e Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Tue, 5 Sep 2023 17:12:04 +0200 Subject: [PATCH 10/18] fix 3.8 compat --- anndata/tests/test_base.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/anndata/tests/test_base.py b/anndata/tests/test_base.py index 9d19d366a..005a8d479 100644 --- a/anndata/tests/test_base.py +++ b/anndata/tests/test_base.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from itertools import product import re import warnings From 2115eea94fd888005afbdaaa394ab4a51ff45cbd Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Tue, 5 Sep 2023 17:18:17 +0200 Subject: [PATCH 11/18] fix msg --- anndata/_core/anndata.py | 4 ++-- anndata/tests/test_base.py | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/anndata/_core/anndata.py b/anndata/_core/anndata.py index df6a5abfd..a9f575038 100644 --- a/anndata/_core/anndata.py +++ b/anndata/_core/anndata.py @@ -111,8 +111,8 @@ def _mk_df_error( if source == "X": what = "row" if attr == "obs" else "column" msg = ( - f"Observations annot. `{attr}` must have number of {what}s of `X`" - f" ({expected}), but has {actual} {what}s." + f"Observations annot. `{attr}` must have as many rows as `X` has {what}s " + f"({expected}), but has {actual} rows." ) else: msg = ( diff --git a/anndata/tests/test_base.py b/anndata/tests/test_base.py index 005a8d479..17d6a9476 100644 --- a/anndata/tests/test_base.py +++ b/anndata/tests/test_base.py @@ -54,7 +54,10 @@ def test_creation(): ("src", "src_arg", "dim_msg"), [ pytest.param( - "X", adata_dense.X, "`{dim}` must have number of {mat_dim}s of `X`", id="x" + "X", + adata_dense.X, + "`{dim}` must have as many rows as `X` has {mat_dim}s", + id="x", ), pytest.param( "shape", (2, 2), "`shape` is inconsistent with `{dim}`", id="shape" From e061347f3d6b52b6048a4048a9040ccb39ef7848 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Tue, 5 Sep 2023 17:20:22 +0200 Subject: [PATCH 12/18] annots --- anndata/tests/test_raw.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/anndata/tests/test_raw.py b/anndata/tests/test_raw.py index c376a54b4..5686a4edc 100644 --- a/anndata/tests/test_raw.py +++ b/anndata/tests/test_raw.py @@ -33,7 +33,7 @@ @pytest.fixture -def adata_raw(): +def adata_raw() -> ad.AnnData: adata = ad.AnnData( np.array(data, dtype="int32"), obs=obs_dict, var=var_dict, uns=uns_dict ) @@ -48,18 +48,18 @@ def adata_raw(): # ------------------------------------------------------------------------------- -def test_raw_init(adata_raw): +def test_raw_init(adata_raw: ad.AnnData): assert adata_raw.var_names.tolist() == ["var1", "var2"] assert adata_raw.raw.var_names.tolist() == ["var1", "var2", "var3"] assert adata_raw.raw[:, 0].X.tolist() == [[1], [4], [7]] -def test_raw_del(adata_raw): +def test_raw_del(adata_raw: ad.AnnData): del adata_raw.raw assert adata_raw.raw is None -def test_raw_set_as_none(adata_raw): +def test_raw_set_as_none(adata_raw: ad.AnnData): # Test for scverse/anndata#445 a = adata_raw b = adata_raw.copy() @@ -70,7 +70,7 @@ def test_raw_set_as_none(adata_raw): assert_equal(a, b) -def test_raw_of_view(adata_raw): +def test_raw_of_view(adata_raw: ad.AnnData): adata_view = adata_raw[adata_raw.obs["oanno1"] == "cat2"] assert adata_view.raw.X.tolist() == [ [4, 5, 6], @@ -78,7 +78,7 @@ def test_raw_of_view(adata_raw): ] -def test_raw_rw(adata_raw, backing_h5ad): +def test_raw_rw(adata_raw: ad.AnnData, backing_h5ad): adata_raw.write(backing_h5ad) adata_read = ad.read(backing_h5ad) @@ -89,7 +89,7 @@ def test_raw_rw(adata_raw, backing_h5ad): assert adata_raw.raw[:, 0].X.tolist() == [[1], [4], [7]] -def test_raw_view_rw(adata_raw, backing_h5ad): +def test_raw_view_rw(adata_raw: ad.AnnData, backing_h5ad): # Make sure it still writes correctly if the object is a view adata_raw_view = adata_raw[:, adata_raw.var_names] assert_equal(adata_raw_view, adata_raw) @@ -104,7 +104,7 @@ def test_raw_view_rw(adata_raw, backing_h5ad): assert adata_raw.raw[:, 0].X.tolist() == [[1], [4], [7]] -def test_raw_backed(adata_raw, backing_h5ad): +def test_raw_backed(adata_raw: ad.AnnData, backing_h5ad): adata_raw.filename = backing_h5ad assert adata_raw.var_names.tolist() == ["var1", "var2"] @@ -114,7 +114,7 @@ def test_raw_backed(adata_raw, backing_h5ad): assert adata_raw.raw[:, 0].X[:].tolist() == [[1], [4], [7]] -def test_raw_view_backed(adata_raw, backing_h5ad): +def test_raw_view_backed(adata_raw: ad.AnnData, backing_h5ad): adata_raw.filename = backing_h5ad assert adata_raw.var_names.tolist() == ["var1", "var2"] From 2cb6f507243944891a017e728ca8c9ed59e3eb7b Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Tue, 5 Sep 2023 17:36:19 +0200 Subject: [PATCH 13/18] fix raw --- anndata/_core/raw.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/anndata/_core/raw.py b/anndata/_core/raw.py index 0937ad788..058c9055c 100644 --- a/anndata/_core/raw.py +++ b/anndata/_core/raw.py @@ -34,8 +34,9 @@ def __init__( self._X = X.get() else: self._X = X + n_var = None if self._X is None else self._X.shape[1] self._var = _gen_dataframe( - var, ["var_names"], source="X", attr="var", length=self.X.shape[1] + var, ["var_names"], source="X", attr="var", length=n_var ) self._varm = AxisArrays(self, 1, varm) elif X is None: # construct from adata From a881d756c4a93cdbd0b3eb431f9f5a156f8466be Mon Sep 17 00:00:00 2001 From: Phil Schaf Date: Thu, 7 Sep 2023 15:23:11 +0200 Subject: [PATCH 14/18] clearer tests --- anndata/tests/test_x.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/anndata/tests/test_x.py b/anndata/tests/test_x.py index 524950b56..9ec8800e6 100644 --- a/anndata/tests/test_x.py +++ b/anndata/tests/test_x.py @@ -66,7 +66,7 @@ def test_del_set_equiv_X(): @pytest.mark.parametrize( - ("obs", "var", "shape"), + ("obs", "var", "shape_expected"), [ pytest.param(dict(obs_names=["1", "2"]), None, (2, 0), id="obs"), pytest.param(None, dict(var_names=["a", "b"]), (0, 2), id="var"), @@ -78,10 +78,10 @@ def test_del_set_equiv_X(): ), ], ) -def test_init_x_as_none_shape_from_obs_var(obs, var, shape): +def test_init_x_as_none_shape_from_obs_var(obs, var, shape_expected): adata = AnnData(None, obs, var) assert adata.X is None - assert adata.shape == shape + assert adata.shape == shape_expected def test_init_x_as_none_explicit_shape(): From f3eed730450596b4104ee9c2c452b4486c4d9dbc Mon Sep 17 00:00:00 2001 From: Phil Schaf Date: Thu, 7 Sep 2023 16:27:56 +0200 Subject: [PATCH 15/18] Release note --- docs/release-notes/0.10.0.md | 2 ++ docs/release-notes/0.10.1.md | 4 ++++ docs/release-notes/0.9.3.md | 4 ---- 3 files changed, 6 insertions(+), 4 deletions(-) create mode 100644 docs/release-notes/0.10.1.md delete mode 100644 docs/release-notes/0.9.3.md diff --git a/docs/release-notes/0.10.0.md b/docs/release-notes/0.10.0.md index 933994191..025110aaa 100644 --- a/docs/release-notes/0.10.0.md +++ b/docs/release-notes/0.10.0.md @@ -31,3 +31,5 @@ ```{rubric} Bug fixes ``` + +* Fix shape inference on initialization when `X=None` is specified {pr}`1121` {user}`flying-sheep` diff --git a/docs/release-notes/0.10.1.md b/docs/release-notes/0.10.1.md new file mode 100644 index 000000000..8404a51c2 --- /dev/null +++ b/docs/release-notes/0.10.1.md @@ -0,0 +1,4 @@ +### 0.10.1 {small}`the future` + +```{rubric} Bugfix +``` diff --git a/docs/release-notes/0.9.3.md b/docs/release-notes/0.9.3.md deleted file mode 100644 index 5889feb8d..000000000 --- a/docs/release-notes/0.9.3.md +++ /dev/null @@ -1,4 +0,0 @@ -### 0.9.3 {small}`the future` - -```{rubric} Bugfix -``` From 89eb8902a344f29748320f868a0d46e5d13a051c Mon Sep 17 00:00:00 2001 From: Phil Schaf Date: Thu, 7 Sep 2023 16:39:03 +0200 Subject: [PATCH 16/18] fix release note index --- docs/release-notes/release-latest.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/release-notes/release-latest.md b/docs/release-notes/release-latest.md index 27cf8d866..a2a4d51c6 100644 --- a/docs/release-notes/release-latest.md +++ b/docs/release-notes/release-latest.md @@ -1,13 +1,13 @@ ## Version 0.10 +```{include} /release-notes/0.10.1.md +``` + ```{include} /release-notes/0.10.0.md ``` ## Version 0.9 -```{include} /release-notes/0.9.3.md -``` - ```{include} /release-notes/0.9.2.md ``` From a857f6b3a3cbc50cb337be62a8ddbbb586bf5906 Mon Sep 17 00:00:00 2001 From: Phil Schaf Date: Thu, 7 Sep 2023 17:19:23 +0200 Subject: [PATCH 17/18] no 0.10.1 yet --- docs/release-notes/0.10.1.md | 4 ---- docs/release-notes/release-latest.md | 3 --- 2 files changed, 7 deletions(-) delete mode 100644 docs/release-notes/0.10.1.md diff --git a/docs/release-notes/0.10.1.md b/docs/release-notes/0.10.1.md deleted file mode 100644 index 8404a51c2..000000000 --- a/docs/release-notes/0.10.1.md +++ /dev/null @@ -1,4 +0,0 @@ -### 0.10.1 {small}`the future` - -```{rubric} Bugfix -``` diff --git a/docs/release-notes/release-latest.md b/docs/release-notes/release-latest.md index a2a4d51c6..83902ff47 100644 --- a/docs/release-notes/release-latest.md +++ b/docs/release-notes/release-latest.md @@ -1,8 +1,5 @@ ## Version 0.10 -```{include} /release-notes/0.10.1.md -``` - ```{include} /release-notes/0.10.0.md ``` From e1c19c18aae729df2386206aeec454e5f99d88a6 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Fri, 8 Sep 2023 10:41:21 +0200 Subject: [PATCH 18/18] suggestions --- anndata/_core/anndata.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/anndata/_core/anndata.py b/anndata/_core/anndata.py index a9f575038..5fc1fae89 100644 --- a/anndata/_core/anndata.py +++ b/anndata/_core/anndata.py @@ -527,7 +527,7 @@ def _init_as_actual( X = np.array(X, dtype, copy=False) # data matrix and shape self._X = X - n_obs, n_vars = self._X.shape + n_obs, n_vars = X.shape source = "X" else: self._X = None @@ -830,12 +830,12 @@ def raw(self): @property def n_obs(self) -> int: """Number of observations.""" - return len(self.obs) + return len(self.obs_names) @property def n_vars(self) -> int: """Number of variables/features.""" - return len(self.var) + return len(self.var_names) def _set_dim_df(self, value: pd.DataFrame, attr: str): if not isinstance(value, pd.DataFrame):