Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bugfix/kbdev 1115 get cancer genes #104

Merged
merged 5 commits into from
Oct 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions graphkb/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,10 @@


ONCOKB_SOURCE_NAME = "oncokb"
TSO500_SOURCE_NAME = "tso500"
ONCOGENE = "oncogenic"
TUMOUR_SUPPRESSIVE = "tumour suppressive"
CANCER_GENE = "cancer gene"
FUSION_NAMES = ["structural variant", "fusion"]

PHARMACOGENOMIC_SOURCE_EXCLUDE_LIST = ["cancer genome interpreter", "civic"]
Expand Down
40 changes: 30 additions & 10 deletions graphkb/genes.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from . import GraphKBConnection
from .constants import (
BASE_THERAPEUTIC_TERMS,
CANCER_GENE,
CHROMOSOMES,
FAILED_REVIEW_STATUS,
GENE_RETURN_PROPERTIES,
Expand All @@ -12,6 +13,7 @@
PHARMACOGENOMIC_SOURCE_EXCLUDE_LIST,
PREFERRED_GENE_SOURCE,
RELEVANCE_BASE_TERMS,
TSO500_SOURCE_NAME,
TUMOUR_SUPPRESSIVE,
)
from .match import get_equivalent_features
Expand All @@ -20,25 +22,29 @@
from .vocab import get_terms_set


def _get_oncokb_gene_list(
conn: GraphKBConnection, relevance: str, ignore_cache: bool = False
def _get_tumourigenesis_genes_list(
conn: GraphKBConnection,
relevance: str,
sources: List[str],
ignore_cache: bool = False,
) -> List[Ontology]:
source = conn.get_source(ONCOKB_SOURCE_NAME)["@rid"]

statements = cast(
List[Statement],
conn.query(
{
"target": "Statement",
"filters": [
{"source": source},
{"relevance": {"target": "Vocabulary", "filters": {"name": relevance}}},
],
"filters": {
"AND": [
{"source": {"target": "Source", "filters": {"name": sources}}},
{"relevance": {"target": "Vocabulary", "filters": {"name": relevance}}},
]
},
"returnProperties": [f"subject.{prop}" for prop in GENE_RETURN_PROPERTIES],
},
ignore_cache=ignore_cache,
),
)

genes: Dict[str, Ontology] = {}

for statement in statements:
Expand All @@ -58,7 +64,7 @@ def get_oncokb_oncogenes(conn: GraphKBConnection) -> List[Ontology]:
Returns:
gene (Feature) records
"""
return _get_oncokb_gene_list(conn, ONCOGENE)
return _get_tumourigenesis_genes_list(conn, ONCOGENE, [ONCOKB_SOURCE_NAME])


def get_oncokb_tumour_supressors(conn: GraphKBConnection) -> List[Ontology]:
Expand All @@ -70,7 +76,21 @@ def get_oncokb_tumour_supressors(conn: GraphKBConnection) -> List[Ontology]:
Returns:
gene (Feature) records
"""
return _get_oncokb_gene_list(conn, TUMOUR_SUPPRESSIVE)
return _get_tumourigenesis_genes_list(conn, TUMOUR_SUPPRESSIVE, [ONCOKB_SOURCE_NAME])


def get_cancer_genes(conn: GraphKBConnection) -> List[Ontology]:
"""Get the list of cancer genes stored in GraphKB derived from OncoKB & TSO500.

Args:
conn: the graphkb connection object

Returns:
gene (Feature) records
"""
return _get_tumourigenesis_genes_list(
conn, CANCER_GENE, [ONCOKB_SOURCE_NAME, TSO500_SOURCE_NAME]
)


def get_therapeutic_associated_genes(graphkb_conn: GraphKBConnection) -> List[Ontology]:
Expand Down
17 changes: 17 additions & 0 deletions tests/test_genes.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from graphkb import GraphKBConnection
from graphkb.genes import (
get_cancer_genes,
get_cancer_predisposition_info,
get_gene_information,
get_genes_from_variant_types,
Expand All @@ -22,6 +23,7 @@

CANONICAL_ONCOGENES = ["kras", "nras", "alk"]
CANONICAL_TS = ["cdkn2a", "tp53"]
CANONICAL_CG = ["ercc1", "fanci", "h2bc4", "h2bc17", "acvr1b"]
CANONICAL_FUSION_GENES = ["alk", "ewsr1", "fli1"]
CANONICAL_STRUCTURAL_VARIANT_GENES = ["brca1", "dpyd", "pten"]
CANNONICAL_THERAPY_GENES = ["erbb2", "brca2", "egfr"]
Expand Down Expand Up @@ -112,6 +114,8 @@ def test_oncogene(conn):
assert gene in names
for gene in CANONICAL_TS:
assert gene not in names
for gene in CANONICAL_CG:
assert gene not in names


def test_tumour_supressors(conn):
Expand All @@ -121,6 +125,19 @@ def test_tumour_supressors(conn):
assert gene in names
for gene in CANONICAL_ONCOGENES:
assert gene not in names
for gene in CANONICAL_CG:
assert gene not in names


def test_cancer_genes(conn):
result = get_cancer_genes(conn)
names = {row["name"] for row in result}
for gene in CANONICAL_CG:
assert gene in names
for gene in CANONICAL_TS:
assert gene not in names
for gene in CANONICAL_ONCOGENES:
assert gene not in names


def test_get_pharmacogenomic_info(conn):
Expand Down