Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Finish extending transcript exon model #70

Merged
merged 12 commits into from
Oct 3, 2023
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Cache with transcript/protein pickle files
.genophenocorr_cache

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
Expand Down
7 changes: 6 additions & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
import doctest
import os
import sys

Expand Down Expand Up @@ -175,13 +176,17 @@
doctest_path = [genophenocorr_src]
doctest_test_doctest_blocks = ""

# Nothing special here
doctest_global_setup = """
# For printing data frames "as is".
import pandas as pd
pd.set_option('expand_frame_repr', False)
"""

doctest_default_flags = (doctest.REPORT_ONLY_FIRST_FAILURE
| doctest.ELLIPSIS
| doctest.IGNORE_EXCEPTION_DETAIL
| doctest.DONT_ACCEPT_TRUE_FOR_1)

# -- Intersphinx setup --------------------------------------------------------
intersphinx_mapping = {
"python": ("https://docs.python.org/3/", None),
Expand Down
4 changes: 2 additions & 2 deletions docs/tutorial.rst
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,12 @@ We can then view the data using the list commands.
>>> sorted(cohort.list_all_phenotypes())
[('HP:0001166', 14), ('HP:0001250', 20), ('HP:0001257', 17)]
>>> sorted(cohort.list_all_variants())
[('HetVar1', 13), ('HetVar2', 11), ('HomVar1', 3), ('HomVar2', 2)]
[('1_281_A/G', 16), ('1_361_TTC/T', 13)]
>>> sorted(cohort.list_all_proteins())
[('NP_09876.5', 26)]
>>> tx_dict = cohort.list_data_by_tx('NM_1234.5')
>>> sorted(tx_dict['NM_1234.5'].items())
[('frameshift_variant', 2), ('missense_variant', 2)]
[('frameshift_variant', 1), ('missense_variant', 1)]

Using the counts, we can choose and run what analyses we want.
For instance, we can partition the patients into two groups based on presence/absence of a *frameshift* variant:
Expand Down
7 changes: 4 additions & 3 deletions docs/user-guide/input-data.rst
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ For the purpose of this example, we will use a folder `simple_cohort` with 5 exa

.. doctest:: input-data

>>> simple_cohort_path = 'data/simple_cohort'
>>> import os
>>> simple_cohort_path = os.path.join(os.getcwd(), 'data', 'simple_cohort')

Here we walk the file system, load all phenopacket JSON files, and transform the phenopackets into instances of
:class:`genophenocorr.model.Patient`:
Expand All @@ -76,8 +77,8 @@ Here we walk the file system, load all phenopacket JSON files, and transform the
... pp_path = os.path.join(dirpath, filename)
... with open(pp_path) as fh:
... pp = Parse(fh.read(), Phenopacket())
... patient = patient_creator.create_patient(pp)
... patients.append(patient)
... patient = patient_creator.create_patient(pp)
... patients.append(patient)


>>> f'Loaded {len(patients)} phenopackets'
Expand Down
35 changes: 22 additions & 13 deletions src/genophenocorr/analysis/predicate/_all_predicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import hpotk

from genophenocorr.constants import VariantEffect
from genophenocorr.model import Patient, FeatureType
from genophenocorr.model import Patient, FeatureType, Genotype
from ._api import PolyPredicate, PatientCategory


Expand Down Expand Up @@ -95,7 +95,7 @@ def __init__(self, transcript:str) -> None:
def categories(self) -> typing.Sequence[PatientCategory]:
return HETEROZYGOUS, HOMOZYGOUS, NO_VARIANT

def test(self, patient: Patient, query:VariantEffect) -> typing.Optional[PatientCategory]:
def test(self, patient: Patient, query: VariantEffect) -> typing.Optional[PatientCategory]:
if not isinstance(patient, Patient):
raise ValueError(f"patient must be type Patient but was type {type(patient)}")
if not isinstance(query, VariantEffect):
Expand All @@ -109,11 +109,13 @@ def test(self, patient: Patient, query:VariantEffect) -> typing.Optional[Patient
vars.add(var)
if len(vars) == 1:
for v in vars:
if v.genotype == "heterozygous":
gt = v.genotype_for_sample(patient.patient_id)
if gt == Genotype.HETEROZYGOUS:
return HETEROZYGOUS
elif v.genotype == "homozygous":
elif gt == Genotype.HOMOZYGOUS_ALTERNATE:
return HOMOZYGOUS
else:
# TODO - is this really what we want to return here?
return HETEROZYGOUS
elif len(vars) > 1:
return HOMOZYGOUS
Expand All @@ -135,16 +137,17 @@ def test(self, patient: Patient, query: str) -> typing.Optional[PatientCategory]
raise ValueError(f"query must be type string but was type {type(query)}")
vars = set()
for var in patient.variants:
#print(f"{var.variant_string} == {query}")
if var.variant_string == query:
vars.add(var)
if len(vars) == 1:
for v in vars:
if v.genotype == "heterozygous":
gt = v.genotype_for_sample(patient.patient_id)
if gt == Genotype.HETEROZYGOUS:
return HETEROZYGOUS
elif v.genotype == "homozygous":
elif gt == Genotype.HOMOZYGOUS_ALTERNATE:
return HOMOZYGOUS
else:
# TODO - is this really what we want to return here?
return HETEROZYGOUS
elif len(vars) > 1:
return HOMOZYGOUS
Expand Down Expand Up @@ -173,11 +176,13 @@ def test(self, patient: Patient, query: int) -> typing.Optional[PatientCategory]
vars.add(var)
if len(vars) == 1:
for v in vars:
if v.genotype == "heterozygous":
gt = v.genotype_for_sample(patient.patient_id)
if gt == Genotype.HETEROZYGOUS:
return HETEROZYGOUS
elif v.genotype == "homozygous":
elif gt == Genotype.HOMOZYGOUS_ALTERNATE:
return HOMOZYGOUS
else:
# TODO - is this really what we want to return here?
return HETEROZYGOUS
elif len(vars) > 1:
return HOMOZYGOUS
Expand Down Expand Up @@ -209,11 +214,13 @@ def test(self, patient: Patient, query:FeatureType) -> typing.Optional[PatientCa
vars.add(var)
if len(vars) == 1:
for v in vars:
if v.genotype == "heterozygous":
gt = v.genotype_for_sample(patient.patient_id)
if gt == Genotype.HETEROZYGOUS:
return HETEROZYGOUS
elif v.genotype == "homozygous":
elif gt == Genotype.HOMOZYGOUS_ALTERNATE:
return HOMOZYGOUS
else:
# TODO - is this really what we want to return here?
return HETEROZYGOUS
elif len(vars) > 1:
return HOMOZYGOUS
Expand Down Expand Up @@ -251,11 +258,13 @@ def test(self, patient: Patient, query: str) -> typing.Optional[PatientCategory]
vars.add(var)
if len(vars) == 1:
for v in vars:
if v.genotype == "heterozygous":
gt = v.genotype_for_sample(patient.patient_id)
if gt == Genotype.HETEROZYGOUS:
return HETEROZYGOUS
elif v.genotype == "homozygous":
elif gt == Genotype.HOMOZYGOUS_ALTERNATE:
return HOMOZYGOUS
else:
# TODO - is this really what we want to return here?
return HETEROZYGOUS
elif len(vars) > 1:
return HOMOZYGOUS
Expand Down
Loading