Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update densifying sparse array #1263

Merged
merged 1 commit into from
Jul 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions scvelo/core/_anndata.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ def get_df(
df = data

if issparse(df):
df = np.array(df.A)
df = np.array(df.toarray())
if columns is None and hasattr(df, "names"):
columns = df.names

Expand Down Expand Up @@ -426,7 +426,7 @@ def make_dense(
for modality in modalities:
count_data = get_modality(adata=adata, modality=modality)
if issparse(count_data):
set_modality(adata=adata, modality=modality, new_value=count_data.A)
set_modality(adata=adata, modality=modality, new_value=count_data.toarray())

return adata if not inplace else None

Expand Down
5 changes: 4 additions & 1 deletion scvelo/core/_linear_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,10 @@ def _trim_data(self, data: List) -> List:
data = [data]

data = np.array(
[data_mat.A if issparse(data_mat) else data_mat for data_mat in data]
[
data_mat.toarray() if issparse(data_mat) else data_mat
for data_mat in data
]
)

# TODO: Add explanatory comment
Expand Down
2 changes: 1 addition & 1 deletion scvelo/inference/_metabolic_labeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def _get_n_neighbors(
rep_X = rep_X[rows, cols]

if sparse_op:
n_neighbors_to_use = np.cumsum(rep_X.A > 0, axis=1)
n_neighbors_to_use = np.cumsum(rep_X.toarray() > 0, axis=1)
else:
n_neighbors_to_use = np.cumsum(rep_X > 0, axis=1)

Expand Down
2 changes: 1 addition & 1 deletion scvelo/plotting/heatmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def heatmap(
else adata[:, var_names].X
)
if issparse(X):
X = X.A
X = X.toarray()
df = pd.DataFrame(X[np.argsort(time)], columns=var_names)

if n_convolve is not None:
Expand Down
4 changes: 2 additions & 2 deletions scvelo/plotting/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
def make_dense(X):
"""TODO."""
if issparse(X):
XA = X.A if X.ndim == 2 else X.A1
XA = X.toarray() if X.ndim == 2 else X.A1
else:
XA = X.A1 if isinstance(X, np.matrix) else X
return np.array(XA)
Expand Down Expand Up @@ -799,7 +799,7 @@ def interpret_colorkey(adata, c=None, layer=None, perc=None, use_raw=None):
if adata.raw is None and use_raw:
raise ValueError("AnnData object does not have `raw` counts.")
c = adata.raw.obs_vector(c) if use_raw else adata.obs_vector(c)
c = c.A.flatten() if issparse(c) else c
c = c.toarray().flatten() if issparse(c) else c
elif c in adata.var.keys(): # color by observation key
c = adata.var[c]
elif np.any([var_key in c for var_key in adata.var.keys()]):
Expand Down
2 changes: 1 addition & 1 deletion scvelo/plotting/velocity.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ def velocity(
_adata = adata[:, var]
s, u = _adata.layers[skey], _adata.layers[ukey]
if issparse(s):
s, u = s.A, u.A
s, u = s.toarray(), u.toarray()

# spliced/unspliced phase portrait with steady-state estimate
ax = pl.subplot(gs[v * nplts])
Expand Down
2 changes: 1 addition & 1 deletion scvelo/plotting/velocity_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def velocity_graph(

if groups is not None:
if issparse(T):
T = T.A
T = T.toarray()
T[~groups_to_bool(adata, groups, color)] = 0
T = csr_matrix(T)
T.eliminate_zeros()
Expand Down
12 changes: 6 additions & 6 deletions scvelo/preprocessing/moments.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,12 +89,12 @@ def moments(
adata.layers["Ms"] = (
csr_matrix.dot(connectivities, csr_matrix(adata.layers["spliced"]))
.astype(np.float32)
.A
.toarray()
)
adata.layers["Mu"] = (
csr_matrix.dot(connectivities, csr_matrix(adata.layers["unspliced"]))
.astype(np.float32)
.A
.toarray()
)
# if renormalize: normalize_per_cell(adata, layers={'Ms', 'Mu'}, enforce=True)

Expand Down Expand Up @@ -130,8 +130,8 @@ def second_order_moments(adata, adjusted=False):
s, u = csr_matrix(adata.layers["spliced"]), csr_matrix(adata.layers["unspliced"])
if s.shape[0] == 1:
s, u = s.T, u.T
Mss = csr_matrix.dot(connectivities, s.multiply(s)).astype(np.float32).A
Mus = csr_matrix.dot(connectivities, s.multiply(u)).astype(np.float32).A
Mss = csr_matrix.dot(connectivities, s.multiply(s)).astype(np.float32).toarray()
Mus = csr_matrix.dot(connectivities, s.multiply(u)).astype(np.float32).toarray()
if adjusted:
Mss = 2 * Mss - adata.layers["Ms"].reshape(Mss.shape)
Mus = 2 * Mus - adata.layers["Mu"].reshape(Mus.shape)
Expand All @@ -157,7 +157,7 @@ def second_order_moments_u(adata):

connectivities = get_connectivities(adata)
u = csr_matrix(adata.layers["unspliced"])
Muu = csr_matrix.dot(connectivities, u.multiply(u)).astype(np.float32).A
Muu = csr_matrix.dot(connectivities, u.multiply(u)).astype(np.float32).toarray()

return Muu

Expand Down Expand Up @@ -222,5 +222,5 @@ def get_moments(
else:
Mx = csr_matrix.dot(connectivities, X)
if issparse(X):
Mx = Mx.astype(np.float32).A
Mx = Mx.astype(np.float32).toarray()
return Mx
4 changes: 2 additions & 2 deletions scvelo/preprocessing/neighbors.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ def fit(self, X, metric="l2", M=16, ef=100, ef_construction=100, random_state=0)
ef_c, ef = max(ef_construction, self.n_neighbors), max(self.n_neighbors, ef)
metric = "l2" if metric == "euclidean" else metric

X = X.A if issparse(X) else X
X = X.toarray() if issparse(X) else X
ns, dim = X.shape

knn = hnswlib.Index(space=metric, dim=dim)
Expand Down Expand Up @@ -564,7 +564,7 @@ def get_duplicate_cells(data):
vals = [val for val, count in Counter(lst).items() if count > 1]
idx_dup = np.where(pd.Series(lst).isin(vals))[0]

X_new = np.array(X[idx_dup].A if issparse(X) else X[idx_dup])
X_new = np.array(X[idx_dup].toarray() if issparse(X) else X[idx_dup])
sorted_idx = np.lexsort(X_new.T)
sorted_data = X_new[sorted_idx, :]

Expand Down
6 changes: 3 additions & 3 deletions scvelo/tools/optimization.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@
# TODO: Add docstrings
def get_weight(x, y=None, perc=95):
"""TODO."""
xy_norm = np.array(x.A if issparse(x) else x)
xy_norm = np.array(x.toarray() if issparse(x) else x)
if y is not None:
if issparse(y):
y = y.A
y = y.toarray()
xy_norm = xy_norm / np.clip(np.max(xy_norm, axis=0), 1e-3, None)
xy_norm += y / np.clip(np.max(y, axis=0), 1e-3, None)
if isinstance(perc, numbers.Number):
Expand Down Expand Up @@ -84,7 +84,7 @@ def optimize_NxN(x, y, fit_offset=False, perc=None):
perc = perc[1]
weights = get_weight(x, y, perc).astype(bool)
if issparse(weights):
weights = weights.A
weights = weights.toarray()
else:
weights = None

Expand Down
4 changes: 2 additions & 2 deletions scvelo/tools/paga.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def compute_transitions(self):
transitions_conf.eliminate_zeros()

# remove non-confident direct paths if more confident indirect path is found.
T = transitions_conf.A
T = transitions_conf.toarray()
threshold = max(np.nanmin(np.nanmax(T / (T > 0), axis=0)) - 1e-6, 0.01)
T *= T > threshold
for i in range(len(T)):
Expand All @@ -169,7 +169,7 @@ def compute_transitions(self):
T_tmp[np.where(T_num[:, i])[0][0], i] = T_max
from scipy.sparse.csgraph import minimum_spanning_tree

T_tmp = np.abs(minimum_spanning_tree(-T_tmp).A) > 0
T_tmp = np.abs(minimum_spanning_tree(-T_tmp).toarray()) > 0
T = T_tmp * T

transitions_conf = csr_matrix(T)
Expand Down
10 changes: 5 additions & 5 deletions scvelo/tools/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ def convert_to_adata(vlm, basis=None):
layers = OrderedDict()
layers["spliced"] = vlm.S_sz.T if hasattr(vlm, "S_sz") else vlm.S.T
layers["unspliced"] = vlm.U_sz.T if hasattr(vlm, "U_sz") else vlm.U.T
if hasattr(vlm, "A") and (vlm.A.T.shape == layers["spliced"].shape):
layers["ambiguous"] = vlm.A.T
if hasattr(vlm, "A") and (vlm.toarray().T.shape == layers["spliced"].shape):
layers["ambiguous"] = vlm.toarray().T

if hasattr(vlm, "velocity"):
layers["velocity"] = vlm.velocity.T
Expand Down Expand Up @@ -109,12 +109,12 @@ def __init__(self, adata, basis=None):
self.S = adata.layers["spliced"].T
self.U = adata.layers["unspliced"].T
self.S = (
np.array(self.S.A, **kwargs)
np.array(self.S.toarray(), **kwargs)
if issparse(self.S)
else np.array(self.S, **kwargs)
)
self.U = (
np.array(self.U.A, **kwargs)
np.array(self.U.toarray(), **kwargs)
if issparse(self.U)
else np.array(self.U, **kwargs)
)
Expand Down Expand Up @@ -146,7 +146,7 @@ def __init__(self, adata, basis=None):
if "ambiguous" in adata.layers.keys():
self.A = np.array(adata.layers["ambiguous"].T)
if issparse(self.A):
self.A = self.A.A
self.A = self.A.toarray()

self.ca = {}
self.ra = {}
Expand Down
4 changes: 2 additions & 2 deletions scvelo/tools/terminal_states.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def cell_fate(
T = transition_matrix(_adata, self_transitions=self_transitions)
fate = np.linalg.inv(np.eye(_adata.n_obs) - T)
if issparse(T):
fate = fate.A
fate = fate.toarray()
cell_fates = np.array(_adata.obs[groupby][fate.argmax(1)])
if disconnected_groups is not None:
idx = _adata.obs[groupby].isin(disconnected_groups)
Expand Down Expand Up @@ -123,7 +123,7 @@ def cell_origin(
T = transition_matrix(_adata, self_transitions=self_transitions, backward=True)
fate = np.linalg.inv(np.eye(_adata.n_obs) - T)
if issparse(T):
fate = fate.A
fate = fate.toarray()
cell_fates = np.array(_adata.obs[groupby][fate.argmax(1)])
if disconnected_groups is not None:
idx = _adata.obs[groupby].isin(disconnected_groups)
Expand Down
2 changes: 1 addition & 1 deletion scvelo/tools/transition_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def transition_matrix(
graph = graph.multiply(basis_graph)

if self_transitions:
confidence = graph.max(1).A.flatten()
confidence = graph.max(1).toarray().flatten()
ub = np.percentile(confidence, 98)
self_prob = np.clip(ub - confidence, 0, 1)
graph.setdiag(self_prob)
Expand Down
10 changes: 5 additions & 5 deletions scvelo/tools/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def mean(x, axis=0):
# TODO: Add docstrings
def make_dense(X):
"""TODO."""
XA = X.A if issparse(X) and X.ndim == 2 else X.A1 if issparse(X) else X
XA = X.toarray() if issparse(X) and X.ndim == 2 else X.A1 if issparse(X) else X
if XA.ndim == 2:
XA = XA[0] if XA.shape[0] == 1 else XA[:, 0] if XA.shape[1] == 1 else XA
return np.array(XA)
Expand Down Expand Up @@ -327,8 +327,8 @@ def cutoff_small_velocities(
x = adata.layers["spliced"] if use_raw else adata.layers["Ms"]
y = adata.layers["unspliced"] if use_raw else adata.layers["Mu"]

x_max = x.max(0).A[0] if issparse(x) else x.max(0)
y_max = y.max(0).A[0] if issparse(y) else y.max(0)
x_max = x.max(0).toarray()[0] if issparse(x) else x.max(0)
y_max = y.max(0).toarray()[0] if issparse(y) else y.max(0)

xy_norm = x / np.clip(x_max, 1e-3, None) + y / np.clip(y_max, 1e-3, None)
W = xy_norm >= np.percentile(xy_norm, 98, axis=0) * frac_of_max
Expand Down Expand Up @@ -449,9 +449,9 @@ def vcorrcoef(X, y, mode="pearsons", axis=-1):
Which correlation metric to use.
"""
if issparse(X):
X = np.array(X.A)
X = np.array(X.toarray())
if issparse(y):
y = np.array(y.A)
y = np.array(y.toarray())
if axis == 0:
if X.ndim > 1:
X = np.array(X.T)
Expand Down
4 changes: 2 additions & 2 deletions scvelo/tools/velocity_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def velocity_embedding(
T.eliminate_zeros()

densify = adata.n_obs < 1e4
TA = T.A if densify else None
TA = T.toarray() if densify else None

with warnings.catch_warnings():
warnings.simplefilter("ignore")
Expand All @@ -179,7 +179,7 @@ def velocity_embedding(
)
delta = T.dot(X[:, vgenes]) - X[:, vgenes]
if issparse(delta):
delta = delta.A
delta = delta.toarray()
cos_proj = (V * delta).sum(1) / l2_norm(delta)
V_emb *= np.clip(cos_proj[:, None] * 10, 0, 1)

Expand Down
6 changes: 3 additions & 3 deletions scvelo/tools/velocity_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,12 @@ def __init__(
xkey = xkey if xkey in adata.layers.keys() else "spliced"

X = np.array(
adata.layers[xkey].A[:, subset]
adata.layers[xkey].toarray()[:, subset]
if issparse(adata.layers[xkey])
else adata.layers[xkey][:, subset]
)
V = np.array(
adata.layers[vkey].A[:, subset]
adata.layers[vkey].toarray()[:, subset]
if issparse(adata.layers[vkey])
else adata.layers[vkey][:, subset]
)
Expand Down Expand Up @@ -209,7 +209,7 @@ def compute_cosines(
)
self.uncertainties.eliminate_zeros()

confidence = self.graph.max(1).A.flatten()
confidence = self.graph.max(1).toarray().flatten()
self.self_prob = np.clip(np.percentile(confidence, 98) - confidence, 0, 1)

def _compute_cosines(self, obs_idx, queue):
Expand Down
4 changes: 2 additions & 2 deletions tests/core/test_anndata.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,9 @@ def test_cleanup_all(

if dense:
if layer is None:
adata.X = adata.X.A
adata.X = adata.X.toarray()
else:
adata.layers[layer] = adata.layers[layer].A
adata.layers[layer] = adata.layers[layer].toarray()
returned_adata = cleanup(adata=adata, clean="all", inplace=inplace)

if not inplace:
Expand Down
20 changes: 11 additions & 9 deletions tests/preprocessing/test_moments.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ def test_first_moments(

if dense:
if layer is None:
adata.X = adata.X.A
adata.X = adata.X.toarray()
else:
adata.layers[layer] = adata.layers[layer].A
adata.layers[layer] = adata.layers[layer].toarray()

first_order_moment = get_moments(adata=adata, layer=layer, mode=mode)
assert isinstance(first_order_moment, np.ndarray)
Expand All @@ -69,9 +69,9 @@ def test_second_moments(

if dense:
if layer is None:
adata.X = adata.X.A
adata.X = adata.X.toarray()
else:
adata.layers[layer] = adata.layers[layer].A
adata.layers[layer] = adata.layers[layer].toarray()

second_order_moment = get_moments(
adata=adata, layer=layer, mode=mode, second_order=True, centered=False
Expand All @@ -98,9 +98,9 @@ def test_passing_array_for_layer(

if dense:
if layer is None:
adata.X = adata.X.A
adata.X = adata.X.toarray()
else:
adata.layers[layer] = adata.layers[layer].A
adata.layers[layer] = adata.layers[layer].toarray()

if layer is None:
first_order_moment = get_moments(adata=adata, layer=adata.X, mode=mode)
Expand Down Expand Up @@ -181,13 +181,15 @@ def _compare_adatas(self, adata_1, adata_2):
assert set(adata_1.obsp) == {"distances", "connectivities"}
assert issparse(adata_1.obsp["connectivities"])
np.testing.assert_almost_equal(
adata_1.obsp["connectivities"].A,
adata_2.obsp["connectivities"].A,
adata_1.obsp["connectivities"].toarray(),
adata_2.obsp["connectivities"].toarray(),
decimal=4,
)
assert issparse(adata_1.obsp["distances"])
np.testing.assert_almost_equal(
adata_1.obsp["distances"].A, adata_2.obsp["distances"].A, decimal=3
adata_1.obsp["distances"].toarray(),
adata_2.obsp["distances"].toarray(),
decimal=3,
)

# Check `.uns` is unchanged
Expand Down
Loading
Loading