Skip to content

Commit

Permalink
Metadata GDSC annotation improvements (#625)
Browse files Browse the repository at this point in the history
* Small metadata cell line fixes

* Check that queried GDSC dataset is either 1 or 2

* gdsc_dataset must be specified as a string

* Doc String update
  • Loading branch information
Lilly-May authored Jun 10, 2024
1 parent a51fd2d commit c69acea
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 10 deletions.
18 changes: 9 additions & 9 deletions pertpy/metadata/_cell_line.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,10 +228,8 @@ def annotate(
if query_id == "DepMap_ID":
query_id = "stripped_cell_line_name"
logger.error(
"`stripped_cell_line_name` is used as reference and query identifier ",
" to annotate cell line metadata from Cancerrxgene. "
"Ensure that stripped cell line names are available in 'adata.obs.' ",
"or use the DepMap as `cell_line_source` to annotate the cell line first ",
"`stripped_cell_line_name` is used as reference and query identifier to annotate cell line metadata from Cancerrxgene. "
"Ensure that stripped cell line names are available in 'adata.obs.' or use the DepMap as `cell_line_source` to annotate the cell line first."
)
if self.cancerxgene is None:
self._download_cell_line(cell_line_source="Cancerrxgene")
Expand Down Expand Up @@ -485,7 +483,7 @@ def annotate_from_gdsc(
reference_id: Literal["cell_line_name", "sanger_model_id", "cosmic_id"] = "cell_line_name",
query_perturbation: str = "perturbation",
reference_perturbation: Literal["drug_name", "drug_id"] = "drug_name",
gdsc_dataset: Literal[1, 2] = 1,
gdsc_dataset: Literal["gdsc_1", "gdsc_2"] = "gdsc_1",
verbosity: int | str = 5,
copy: bool = False,
) -> AnnData:
Expand All @@ -500,7 +498,7 @@ def annotate_from_gdsc(
reference_id: The type of cell line identifier in the metadata, cell_line_name, sanger_model_id or cosmic_id.
query_perturbation: The column of `.obs` with perturbation information.
reference_perturbation: The type of perturbation in the metadata, drug_name or drug_id.
gdsc_dataset: The GDSC dataset, 1 or 2.
gdsc_dataset: The GDSC dataset, 1 or 2, specified as 'gdsc_1' or 'gdsc_2'.
The GDSC1 dataset updates previous releases with additional drug screening data from the
Sanger Institute and Massachusetts General Hospital.
It covers 970 Cell lines and 403 Compounds with 333292 IC50s.
Expand Down Expand Up @@ -528,14 +526,16 @@ def annotate_from_gdsc(
"This ensures that the required query ID is included in your data."
)
# Lazily download the GDSC data
if gdsc_dataset == 1:
if gdsc_dataset == "gdsc_1":
if self.drug_response_gdsc1 is None:
self._download_gdsc(gdsc_dataset=1)
gdsc_data = self.drug_response_gdsc1
else:
elif gdsc_dataset == "gdsc_2":
if self.drug_response_gdsc2 is None:
self._download_gdsc(gdsc_dataset=2)
gdsc_data = self.drug_response_gdsc2
else:
raise ValueError("The GDSC dataset specified in `gdsc_dataset` must be either 'gdsc_1' or 'gdsc_2'.")

identifier_num_all = len(adata.obs[query_id].unique())
not_matched_identifiers = list(set(adata.obs[query_id]) - set(gdsc_data[reference_id]))
Expand All @@ -552,7 +552,7 @@ def annotate_from_gdsc(
adata.obs = (
adata.obs.reset_index()
.set_index([query_id, query_perturbation])
.assign(ln_ic50=self.drug_response_gdsc1.set_index([reference_id, reference_perturbation]).ln_ic50)
.assign(ln_ic50=gdsc_data.set_index([reference_id, reference_perturbation]).ln_ic50)
.reset_index()
.set_index(old_index_name)
)
Expand Down
3 changes: 2 additions & 1 deletion pertpy/tools/_differential_gene_expression/_statsmodels.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ def fit(
Examples:
>>> import statsmodels.api as sm
>>> model = StatsmodelsDE(adata, design="~condition")
>>> import pertpy as pt
>>> model = pt.tl.Statsmodels(adata, design="~condition")
>>> model.fit(sm.GLM, family=sm.families.NegativeBinomial(link=sm.families.links.Log()))
>>> results = model.test_contrasts(np.array([0, 1]))
"""
Expand Down

0 comments on commit c69acea

Please sign in to comment.