Skip to content

Commit

Permalink
fixed to_df for sparse data
Browse files Browse the repository at this point in the history
  • Loading branch information
falexwolf committed Jan 12, 2019
1 parent 622503a commit 2d11f9f
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 8 deletions.
16 changes: 12 additions & 4 deletions anndata/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1539,12 +1539,20 @@ def transpose(self):
T = property(transpose)

def to_df(self):
"""Generate shallow pandas DataFrame.
"""Generate shallow :class:`~pandas.DataFrame`.
Data matrix is returned as pandas DataFrame, where observation names are on index,
and variable names on columns.
The data matrix `.X` is returned as
:class:`~pandas.DataFrame`, where `.obs_names` initializes the
index, and `.var_names` the columns.
* No annotations are maintained in the returned object.
* The data matrix is densified in case it is sparse.
"""
return pd.DataFrame(self._X, index=self.obs_names, columns=self.var_names)
if issparse(self._X):
X = self._X.toarray()
else:
X = self._X
return pd.DataFrame(X, index=self.obs_names, columns=self.var_names)

def copy(self, filename=None):
"""Full copy, optionally on disk."""
Expand Down
26 changes: 22 additions & 4 deletions anndata/tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,20 @@
from numpy import ma
import pandas as pd
from scipy import sparse as sp
from scipy.sparse import csr_matrix

from anndata import AnnData


# some test objects that we use below
adata_dense = AnnData(np.array([[1, 2], [3, 4]]))
adata_sparse = AnnData(
csr_matrix([[0, 2, 3], [0, 5, 6]]),
{'obs_names': ['s1', 's2'],
'anno1': ['c1', 'c2']},
{'var_names': ['a', 'b', 'c']})


def test_creation():
AnnData(np.array([[1, 2], [3, 4]]))
AnnData(np.array([[1, 2], [3, 4]]), {}, {})
Expand Down Expand Up @@ -314,10 +324,7 @@ def test_concatenate():

# sparse data
from scipy.sparse import csr_matrix
adata1 = AnnData(csr_matrix([[0, 2, 3], [0, 5, 6]]),
{'obs_names': ['s1', 's2'],
'anno1': ['c1', 'c2']},
{'var_names': ['a', 'b', 'c']})
adata1 = adata_sparse
adata2 = AnnData(csr_matrix([[0, 2, 3], [0, 5, 6]]),
{'obs_names': ['s3', 's4'],
'anno1': ['c3', 'c4']},
Expand Down Expand Up @@ -359,8 +366,19 @@ def test_rename_categories():
assert list(adata.obs['cat_anno'].cat.categories) == new_categories
assert list(adata.uns['tool']['cat_array'].dtype.names) == new_categories


def test_pickle():
import pickle
adata = AnnData()
adata2 = pickle.loads(pickle.dumps(adata))
assert adata2.obsm._parent == adata2


def test_to_df_dense():
df = adata_dense.to_df()


def test_to_df_sparse():
X = adata_sparse.X.toarray()
df = adata_sparse.to_df()
assert df.values.tolist() == X.tolist()

0 comments on commit 2d11f9f

Please sign in to comment.