Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add two distance metrics, three-way comparison and bootstrapping #608

Merged
merged 36 commits into from
Jun 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
fd21dc1
add two distance metrics
wxicu May 26, 2024
3af7d89
add obsm_key param to distance test
wxicu May 26, 2024
3fe911b
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] May 26, 2024
6d419c3
add agg fct
wxicu Jun 2, 2024
ca86025
speed up tests
wxicu Jun 3, 2024
0830535
Merge branch 'main' into distance
wxicu Jun 3, 2024
9fd4c2b
add type
wxicu Jun 3, 2024
fc71eae
add description
wxicu Jun 3, 2024
09e5fea
Update pertpy/tools/_distances/_distances.py
wxicu Jun 5, 2024
ad23ca6
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 5, 2024
dc74884
Update pertpy/tools/_distances/_distances.py
wxicu Jun 5, 2024
c774cd2
Update pertpy/tools/_distances/_distances.py
wxicu Jun 5, 2024
d413d67
Update pertpy/tools/_distances/_distances.py
wxicu Jun 5, 2024
b7f2cf7
Update pertpy/tools/_distances/_distances.py
wxicu Jun 5, 2024
e71f81c
Update pertpy/tools/_distances/_distances.py
wxicu Jun 5, 2024
edaa6e6
Update pertpy/tools/_distances/_distances.py
wxicu Jun 5, 2024
317cfd5
update code
wxicu Jun 6, 2024
47b4134
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 6, 2024
d261410
fix drug
wxicu Jun 6, 2024
4fd29a0
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 6, 2024
30baefe
add bootstrapping and metrics_3g
wxicu Jun 7, 2024
2c8127c
speed up tests,
wxicu Jun 7, 2024
57b14c3
remove test classes
wxicu Jun 10, 2024
78d00fa
drop test classes
wxicu Jun 10, 2024
052fd00
update compare_de
wxicu Jun 12, 2024
3a8eac6
correct the comments
wxicu Jun 12, 2024
63ed17a
speed tests
wxicu Jun 13, 2024
f9e0d36
speed up tests
wxicu Jun 13, 2024
2e65f9d
split metrics_3g
wxicu Jun 18, 2024
2e7acf3
fix pre-commit
wxicu Jun 18, 2024
69163ff
pin numpy <2
wxicu Jun 19, 2024
67c54be
unpin numpy
wxicu Jun 20, 2024
6e32f37
speed up mahalanobis distance
wxicu Jun 20, 2024
620e645
use scipy to calculate mahalanobis distance
wxicu Jun 20, 2024
10d3483
rename DGE to DGEEVAL
wxicu Jun 23, 2024
4a07252
Merge branch 'main' into distance
wxicu Jun 23, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 15 additions & 2 deletions pertpy/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,31 @@
from pertpy.tools._coda._sccoda import Sccoda
from pertpy.tools._coda._tasccoda import Tasccoda
from pertpy.tools._dialogue import Dialogue
from pertpy.tools._differential_gene_expression import EdgeR, PyDESeq2, Statsmodels, TTest, WilcoxonTest
from pertpy.tools._differential_gene_expression import (
DGEEVAL,
EdgeR,
PyDESeq2,
Statsmodels,
TTest,
WilcoxonTest,
)
from pertpy.tools._distances._distance_tests import DistanceTest
from pertpy.tools._distances._distances import Distance
from pertpy.tools._enrichment import Enrichment
from pertpy.tools._milo import Milo
from pertpy.tools._mixscape import Mixscape
from pertpy.tools._perturbation_space._clustering import ClusteringSpace
from pertpy.tools._perturbation_space._comparison import PerturbationComparison
from pertpy.tools._perturbation_space._discriminator_classifiers import (
LRClassifierSpace,
MLPClassifierSpace,
)
from pertpy.tools._perturbation_space._simple import CentroidSpace, DBSCANSpace, KMeansSpace, PseudobulkSpace
from pertpy.tools._perturbation_space._simple import (
CentroidSpace,
DBSCANSpace,
KMeansSpace,
PseudobulkSpace,
)
from pertpy.tools._scgen import Scgen

__all__ = [
Expand Down
1 change: 1 addition & 0 deletions pertpy/tools/_differential_gene_expression/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from ._base import ContrastType, LinearModelBase, MethodBase
from ._dge_comparison import DGEEVAL
from ._edger import EdgeR
from ._pydeseq2 import PyDESeq2
from ._simple_tests import SimpleComparisonBase, TTest, WilcoxonTest
Expand Down
86 changes: 86 additions & 0 deletions pertpy/tools/_differential_gene_expression/_dge_comparison.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import numpy as np
import pandas as pd
from anndata import AnnData


class DGEEVAL:
def compare(
self,
adata: AnnData | None = None,
de_key1: str = None,
de_key2: str = None,
de_df1: pd.DataFrame | None = None,
de_df2: pd.DataFrame | None = None,
shared_top: int = 100,
) -> dict[str, float]:
"""Compare two differential expression analyses.

Compare two sets of DE results and evaluate the similarity by the overlap of top DEG and
the correlation of their scores and adjusted p-values.

Args:
adata: AnnData object containing DE results in `uns`. Required if `de_key1` and `de_key2` are used.
de_key1: Key for DE results in `adata.uns`, e.g., output of `tl.rank_genes_groups`.
de_key2: Another key for DE results in `adata.uns`, e.g., output of `tl.rank_genes_groups`.
de_df1: DataFrame containing DE results, e.g. output from pertpy differential gene expression interface.
de_df2: DataFrame containing DE results, e.g. output from pertpy differential gene expression interface.
shared_top: The number of top DEG to compute the proportion of their intersection.

"""
if (de_key1 or de_key2) and (de_df1 is not None or de_df2 is not None):
raise ValueError(
"Please provide either both `de_key1` and `de_key2` with `adata`, or `de_df1` and `de_df2`, but not both."
)

if de_df1 is None and de_df2 is None: # use keys
if not de_key1 or not de_key2:
raise ValueError("Both `de_key1` and `de_key2` must be provided together if using `adata`.")

else: # use dfs
if de_df1 is None or de_df2 is None:
raise ValueError("Both `de_df1` and `de_df2` must be provided together if using DataFrames.")

if de_key1:
if not adata:
raise ValueError("`adata` should be provided with `de_key1` and `de_key2`. ")
assert all(
k in adata.uns for k in [de_key1, de_key2]
), "Provided `de_key1` and `de_key2` must exist in `adata.uns`."
vars = adata.var_names

if de_df1 is not None:
for df in (de_df1, de_df2):
if not {"variable", "log_fc", "adj_p_value"}.issubset(df.columns):
raise ValueError("Each DataFrame must contain columns: 'variable', 'log_fc', and 'adj_p_value'.")

assert set(de_df1["variable"]) == set(de_df2["variable"]), "Variables in both dataframes must match."
vars = de_df1["variable"].sort_values()

shared_top = min(shared_top, len(vars))
vars_ranks = np.arange(1, len(vars) + 1)
results = pd.DataFrame(index=vars)
top_names = []

if de_key1 and de_key2:
for i, k in enumerate([de_key1, de_key2]):
label = adata.uns[k]["names"].dtype.names[0]
srt_idx = np.argsort(adata.uns[k]["names"][label])
results[f"scores_{i}"] = adata.uns[k]["scores"][label][srt_idx]
results[f"pvals_adj_{i}"] = adata.uns[k]["pvals_adj"][label][srt_idx]
results[f"ranks_{i}"] = vars_ranks[srt_idx]
top_names.append(adata.uns[k]["names"][label][:shared_top])
else:
for i, df in enumerate([de_df1, de_df2]):
srt_idx = np.argsort(df["variable"])
results[f"scores_{i}"] = df["log_fc"].values[srt_idx]
results[f"pvals_adj_{i}"] = df["adj_p_value"].values[srt_idx]
results[f"ranks_{i}"] = vars_ranks[srt_idx]
top_names.append(df["variable"][:shared_top])

metrics = {}
metrics["shared_top_genes"] = len(set(top_names[0]).intersection(top_names[1])) / shared_top
metrics["scores_corr"] = results["scores_0"].corr(results["scores_1"], method="pearson")
metrics["pvals_adj_corr"] = results["pvals_adj_0"].corr(results["pvals_adj_1"], method="pearson")
metrics["scores_ranks_corr"] = results["ranks_0"].corr(results["ranks_1"], method="spearman")

return metrics
13 changes: 9 additions & 4 deletions pertpy/tools/_distances/_distance_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,14 @@ def __init__(
self.alpha = alpha
self.correction = correction
self.cell_wise_metric = (
cell_wise_metric if cell_wise_metric else Distance(self.metric, self.obsm_key).cell_wise_metric
cell_wise_metric if cell_wise_metric else Distance(self.metric, obsm_key=self.obsm_key).cell_wise_metric
)

self.distance = Distance(
self.metric, layer_key=self.layer_key, obsm_key=self.obsm_key, cell_wise_metric=self.cell_wise_metric
self.metric,
layer_key=self.layer_key,
obsm_key=self.obsm_key,
cell_wise_metric=self.cell_wise_metric,
)

def __call__(
Expand Down Expand Up @@ -176,7 +179,8 @@ def test_xy(self, adata: AnnData, groupby: str, contrast: str, show_progressbar:
# Evaluate the test
# count times shuffling resulted in larger distance
comparison_results = np.array(
pd.concat([r["distance"] - df["distance"] for r in results], axis=1) > 0, dtype=int
pd.concat([r["distance"] - df["distance"] for r in results], axis=1) > 0,
dtype=int,
)
n_failures = pd.Series(np.clip(np.sum(comparison_results, axis=1), 1, np.inf), index=df.index)
pvalues = n_failures / self.n_perms
Expand Down Expand Up @@ -284,7 +288,8 @@ def test_precomputed(self, adata: AnnData, groupby: str, contrast: str, verbose:
# Evaluate the test
# count times shuffling resulted in larger distance
comparison_results = np.array(
pd.concat([r["distance"] - df["distance"] for r in results], axis=1) > 0, dtype=int
pd.concat([r["distance"] - df["distance"] for r in results], axis=1) > 0,
dtype=int,
)
n_failures = pd.Series(np.clip(np.sum(comparison_results, axis=1), 1, np.inf), index=df.index)
pvalues = n_failures / self.n_perms
Expand Down
Loading
Loading