monarch-initiative · ielis · Oct 3, 2023 · Sep 29, 2023 · Sep 29, 2023 · Oct 2, 2023
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,6 @@
+# Cache with transcript/protein pickle files
+.genophenocorr_cache
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]

diff --git a/docs/conf.py b/docs/conf.py
@@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+import doctest
 import os
 import sys
 
@@ -175,13 +176,17 @@
 doctest_path = [genophenocorr_src]
 doctest_test_doctest_blocks = ""
 
-# Nothing special here
 doctest_global_setup = """
 # For printing data frames "as is".
 import pandas as pd
 pd.set_option('expand_frame_repr', False)
 """
 
+doctest_default_flags = (doctest.REPORT_ONLY_FIRST_FAILURE
+                         | doctest.ELLIPSIS
+                         | doctest.IGNORE_EXCEPTION_DETAIL
+                         | doctest.DONT_ACCEPT_TRUE_FOR_1)
+
 # -- Intersphinx setup --------------------------------------------------------
 intersphinx_mapping = {
     "python": ("https://docs.python.org/3/", None),

diff --git a/docs/tutorial.rst b/docs/tutorial.rst
@@ -46,12 +46,12 @@ We can then view the data using the list commands.
   >>> sorted(cohort.list_all_phenotypes())
   [('HP:0001166', 14), ('HP:0001250', 20), ('HP:0001257', 17)]
   >>> sorted(cohort.list_all_variants())
-  [('HetVar1', 13), ('HetVar2', 11), ('HomVar1', 3), ('HomVar2', 2)]
+  [('1_281_A/G', 16), ('1_361_TTC/T', 13)]
   >>> sorted(cohort.list_all_proteins())
   [('NP_09876.5', 26)]
   >>> tx_dict = cohort.list_data_by_tx('NM_1234.5')
   >>> sorted(tx_dict['NM_1234.5'].items())
-  [('frameshift_variant', 2), ('missense_variant', 2)]
+  [('frameshift_variant', 1), ('missense_variant', 1)]
 
 Using the counts, we can choose and run what analyses we want.
 For instance, we can partition the patients into two groups based on presence/absence of a *frameshift* variant:

diff --git a/docs/user-guide/input-data.rst b/docs/user-guide/input-data.rst
@@ -58,7 +58,8 @@ For the purpose of this example, we will use a folder `simple_cohort` with 5 exa
 
 .. doctest:: input-data
 
-  >>> simple_cohort_path = 'data/simple_cohort'
+  >>> import os
+  >>> simple_cohort_path = os.path.join(os.getcwd(), 'data', 'simple_cohort')
 
 Here we walk the file system, load all phenopacket JSON files, and transform the phenopackets into instances of
 :class:`genophenocorr.model.Patient`:
@@ -76,8 +77,8 @@ Here we walk the file system, load all phenopacket JSON files, and transform the
   ...       pp_path = os.path.join(dirpath, filename)
   ...       with open(pp_path) as fh:
   ...         pp = Parse(fh.read(), Phenopacket())
-  ...     patient = patient_creator.create_patient(pp)
-  ...     patients.append(patient)
+  ...       patient = patient_creator.create_patient(pp)
+  ...       patients.append(patient)
 
 
   >>> f'Loaded {len(patients)} phenopackets'

diff --git a/src/genophenocorr/analysis/predicate/_all_predicates.py b/src/genophenocorr/analysis/predicate/_all_predicates.py
@@ -3,7 +3,7 @@
 import hpotk
 
 from genophenocorr.constants import VariantEffect
-from genophenocorr.model import Patient, FeatureType
+from genophenocorr.model import Patient, FeatureType, Genotype
 from ._api import PolyPredicate, PatientCategory
 
 
@@ -95,7 +95,7 @@ def __init__(self, transcript:str) -> None:
     def categories(self) -> typing.Sequence[PatientCategory]:
         return HETEROZYGOUS, HOMOZYGOUS, NO_VARIANT
 
-    def test(self, patient: Patient, query:VariantEffect) -> typing.Optional[PatientCategory]:
+    def test(self, patient: Patient, query: VariantEffect) -> typing.Optional[PatientCategory]:
         if not isinstance(patient, Patient):
             raise ValueError(f"patient must be type Patient but was type {type(patient)}")
         if not isinstance(query, VariantEffect):
@@ -109,11 +109,13 @@ def test(self, patient: Patient, query:VariantEffect) -> typing.Optional[Patient
                             vars.add(var)
         if len(vars) == 1:
             for v in vars:
-                if v.genotype == "heterozygous":
+                gt = v.genotype_for_sample(patient.patient_id)
+                if gt == Genotype.HETEROZYGOUS:
                     return HETEROZYGOUS
-                elif v.genotype == "homozygous":
+                elif gt == Genotype.HOMOZYGOUS_ALTERNATE:
                     return HOMOZYGOUS
                 else:
+                    # TODO - is this really what we want to return here?
                     return HETEROZYGOUS
         elif len(vars) > 1:
             return HOMOZYGOUS
@@ -135,16 +137,17 @@ def test(self, patient: Patient, query: str) -> typing.Optional[PatientCategory]
             raise ValueError(f"query must be type string but was type {type(query)}")
         vars = set()
         for var in patient.variants:
-            #print(f"{var.variant_string} == {query}")
             if var.variant_string == query:
                 vars.add(var)
         if len(vars) == 1:
             for v in vars:
-                if v.genotype == "heterozygous":
+                gt = v.genotype_for_sample(patient.patient_id)
+                if gt == Genotype.HETEROZYGOUS:
                     return HETEROZYGOUS
-                elif v.genotype == "homozygous":
+                elif gt == Genotype.HOMOZYGOUS_ALTERNATE:
                     return HOMOZYGOUS
                 else:
+                    # TODO - is this really what we want to return here?
                     return HETEROZYGOUS
         elif len(vars) > 1:
             return HOMOZYGOUS
@@ -173,11 +176,13 @@ def test(self, patient: Patient, query: int) -> typing.Optional[PatientCategory]
                             vars.add(var)
         if len(vars) == 1:
             for v in vars:
-                if v.genotype == "heterozygous":
+                gt = v.genotype_for_sample(patient.patient_id)
+                if gt == Genotype.HETEROZYGOUS:
                     return HETEROZYGOUS
-                elif v.genotype == "homozygous":
+                elif gt == Genotype.HOMOZYGOUS_ALTERNATE:
                     return HOMOZYGOUS
                 else:
+                    # TODO - is this really what we want to return here?
                     return HETEROZYGOUS
         elif len(vars) > 1:
             return HOMOZYGOUS
@@ -209,11 +214,13 @@ def test(self, patient: Patient, query:FeatureType) -> typing.Optional[PatientCa
                                         vars.add(var)
         if len(vars) == 1:
             for v in vars:
-                if v.genotype == "heterozygous":
+                gt = v.genotype_for_sample(patient.patient_id)
+                if gt == Genotype.HETEROZYGOUS:
                     return HETEROZYGOUS
-                elif v.genotype == "homozygous":
+                elif gt == Genotype.HOMOZYGOUS_ALTERNATE:
                     return HOMOZYGOUS
                 else:
+                    # TODO - is this really what we want to return here?
                     return HETEROZYGOUS
         elif len(vars) > 1:
             return HOMOZYGOUS
@@ -251,11 +258,13 @@ def test(self, patient: Patient, query: str) -> typing.Optional[PatientCategory]
                                         vars.add(var)
         if len(vars) == 1:
             for v in vars:
-                if v.genotype == "heterozygous":
+                gt = v.genotype_for_sample(patient.patient_id)
+                if gt == Genotype.HETEROZYGOUS:
                     return HETEROZYGOUS
-                elif v.genotype == "homozygous":
+                elif gt == Genotype.HOMOZYGOUS_ALTERNATE:
                     return HOMOZYGOUS
                 else:
+                    # TODO - is this really what we want to return here?
                     return HETEROZYGOUS
         elif len(vars) > 1:
             return HOMOZYGOUS