Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix change length bug #94

Merged
merged 6 commits into from
Nov 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions docs/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,22 @@ The *latest* release can be installed by cloning the GitHub repository::
The code above will clone the source code from GitHub repository, switch to the `develop` branch with the latest features,
and install the library into the current Python (virtual) environment.


Run tests
^^^^^^^^^

Running tests can be done as an optional step after installation. However, some additional
libraries are required to run tests, hence we must do one more install, this time with `test` option enabled::

python3 -m pip install .[test]

Then, running the tests is as simple as::

pytest

This will run the unit and integration tests that do *not* require internet access. To run the "online" tests,
we add ``--runonlin`` option to the command line invocation::

pytest --runonline

That's all about testing!
22 changes: 11 additions & 11 deletions src/genophenocorr/analysis/predicate/_all_predicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,22 @@ class HPOPresentPredicate(PolyPredicate[hpotk.TermId]):
name='Observed',
description="""
The sample *is* annotated with the tested phenotype feature `q`.

This is either because the sample is annotated with `q` (exact match),
or because one of sample's annotations is a descendant `q` (annotation propagation).
For instance, we tested for a Seizure and the sample *had* a Clonic seizure
For instance, we tested for a Seizure and the sample *had* a Clonic seizure
(a descendant of Seizure).
""")

NOT_OBSERVED = PatientCategory(cat_id=1,
name='Not observed',
description="""
We are particular about the sample *not* having the tested feature `q`.

In other words, `q` was *excluded* in the sample or the sample is annotated with an excluded ancestor of `q`.
For instance, we tested for a Clonic seizure and the sample did *not* have any Seizure, which implies
*not* Clonic seizure.

For instance, we tested for a Clonic seizure and the sample did *not* have any Seizure, which implies
*not* Clonic seizure.
""")

NOT_MEASURED = PatientCategory(cat_id=2,
Expand All @@ -36,7 +36,7 @@ class HPOPresentPredicate(PolyPredicate[hpotk.TermId]):
We do not know if the sample has or has not the tested feature.
""")

def __init__(self,
def __init__(self,
hpo: hpotk.MinimalOntology) -> None:
self._hpo = hpotk.util.validate_instance(hpo, hpotk.MinimalOntology, 'hpo')

Expand Down Expand Up @@ -69,13 +69,13 @@ def test(self, patient: Patient, query: hpotk.TermId) -> typing.Optional[Patient
HETEROZYGOUS = PatientCategory(cat_id=0,
name='Heterozygous',
description="""
This sample has the tested attribute on one allele.
This sample has the tested attribute on one allele.
""")

HOMOZYGOUS = PatientCategory(cat_id=1,
name='Homozygous',
description="""
This sample has the tested attribute on both alleles.
This sample has the tested attribute on both alleles.
""")

NO_VARIANT = PatientCategory(cat_id=2,
Expand All @@ -87,7 +87,7 @@ def test(self, patient: Patient, query: hpotk.TermId) -> typing.Optional[Patient


class VariantEffectPredicate(PolyPredicate[VariantEffect]):

def __init__(self, transcript:str) -> None:
self._transcript = transcript

Expand Down Expand Up @@ -268,4 +268,4 @@ def test(self, patient: Patient, query: str) -> typing.Optional[PatientCategory]
elif len(vars) > 1:
return HOMOZYGOUS
else:
return NO_VARIANT
return NO_VARIANT
8 changes: 4 additions & 4 deletions src/genophenocorr/preprocessing/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from ._api import VariantCoordinateFinder, FunctionalAnnotator, ProteinMetadataService
from ._config import configure_caching_patient_creator
from ._config import configure_caching_patient_creator, configure_patient_creator
from ._patient import PatientCreator
from ._phenopacket import PhenopacketVariantCoordinateFinder, PhenopacketPatientCreator, load_phenopacket_folder
from ._phenopacket import PhenopacketVariantCoordinateFinder, PhenopacketPatientCreator, load_phenopacket_folder, load_phenopacket
from ._phenotype import PhenotypeCreator, PhenotypeValidationException
from ._protein import ProteinAnnotationCache, ProtCachingFunctionalAnnotator
from ._uniprot import UniprotProteinMetadataService
Expand All @@ -11,10 +11,10 @@
__all__ = [
'VariantCoordinateFinder', 'FunctionalAnnotator', 'ProteinMetadataService',
'PatientCreator',
'PhenopacketVariantCoordinateFinder', 'PhenopacketPatientCreator', 'load_phenopacket_folder',
'PhenopacketVariantCoordinateFinder', 'PhenopacketPatientCreator', 'load_phenopacket_folder', 'load_phenopacket',
'PhenotypeCreator', 'PhenotypeValidationException',
'ProteinAnnotationCache', 'ProtCachingFunctionalAnnotator',
'UniprotProteinMetadataService',
'VepFunctionalAnnotator', 'VariantAnnotationCache', 'VarCachingFunctionalAnnotator',
'configure_caching_patient_creator'
'configure_caching_patient_creator', 'configure_patient_creator'
]
81 changes: 63 additions & 18 deletions src/genophenocorr/preprocessing/_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,16 @@

import hpotk

from genophenocorr.model.genome import GRCh37, GRCh38
from genophenocorr.model.genome import GRCh37, GRCh38, GenomeBuild

from ._api import FunctionalAnnotator, ProteinMetadataService
from ._phenotype import PhenotypeCreator
from ._phenopacket import PhenopacketPatientCreator
from ._api import FunctionalAnnotator
from ._protein import ProteinAnnotationCache, ProtCachingFunctionalAnnotator
from ._uniprot import UniprotProteinMetadataService
from ._variant import VarCachingFunctionalAnnotator, VariantAnnotationCache
from ._vep import VepFunctionalAnnotator
from ._vv import VVHgvsVariantCoordinateFinder


def configure_caching_patient_creator(hpo: hpotk.MinimalOntology,
Expand All @@ -23,7 +24,7 @@ def configure_caching_patient_creator(hpo: hpotk.MinimalOntology,
"""
A convenience function for configuring a caching :class:`genophenocorr.preprocessing.PhenopacketPatientCreator`.

To create the patient creator, we need hpo-toolkit's representation of HPO, the validator
To create the patient creator, we need hpo-toolkit's representation of HPO. Other options are optional.

:param hpo: a HPO instance.
:param genome_build: name of the genome build to use, choose from `{'GRCh37.p13', 'GRCh38.p13'}`.
Expand All @@ -39,17 +40,50 @@ def configure_caching_patient_creator(hpo: hpotk.MinimalOntology,
if cache_dir is None:
cache_dir = os.path.join(os.getcwd(), '.genophenocorr_cache')

build = _configure_build(genome_build)
phenotype_creator = _setup_phenotype_creator(hpo, validation_runner)
functional_annotator = _configure_functional_annotator(cache_dir, variant_fallback, protein_fallback)
hgvs_annotator = VVHgvsVariantCoordinateFinder(build)
return PhenopacketPatientCreator(build, phenotype_creator, functional_annotator, hgvs_annotator)


def configure_patient_creator(hpo: hpotk.MinimalOntology,
genome_build: str = 'GRCh38.p13',
validation_runner: typing.Optional[hpotk.validate.ValidationRunner] = None,
variant_fallback: str = 'VEP',
protein_fallback: str = 'UNIPROT') -> PhenopacketPatientCreator:
"""
A convenience function for configuring a non-caching :class:`genophenocorr.preprocessing.PhenopacketPatientCreator`.

To create the patient creator, we need hpo-toolkit's representation of HPO. Other options are optional

:param hpo: a HPO instance.
:param genome_build: name of the genome build to use, choose from `{'GRCh37.p13', 'GRCh38.p13'}`.
:param validation_runner: an instance of the validation runner.
if the data should be cached in `.cache` folder in the current working directory.
In any case, the directory will be created if it does not exist (including non-existing parents).
:param variant_fallback: the fallback variant annotator to use if we cannot find the annotation locally.
Choose from ``{'VEP'}`` (just one fallback implementation is available at the moment).
:param protein_fallback: the fallback protein metadata annotator to use if we cannot find the annotation locally.
Choose from ``{'UNIPROT'}`` (just one fallback implementation is available at the moment).
"""
build = _configure_build(genome_build)

phenotype_creator = _setup_phenotype_creator(hpo, validation_runner)
protein_metadata_service = _configure_fallback_protein_service(protein_fallback)
functional_annotator = _configure_fallback_functional(protein_metadata_service, variant_fallback)
hgvs_annotator = VVHgvsVariantCoordinateFinder(build)
return PhenopacketPatientCreator(build, phenotype_creator, functional_annotator, hgvs_annotator)


def _configure_build(genome_build: str) -> GenomeBuild:
if genome_build == 'GRCh38.p13':
build = GRCh38
return GRCh38
elif genome_build == 'GRCh37.p13':
build = GRCh37
return GRCh37
else:
raise ValueError(f'Unknown build {genome_build}. Choose from [\'GRCh37.p13\', \'GRCh38.p13\']')

phenotype_creator = _setup_phenotype_creator(hpo, validation_runner)
functional_annotator = _configure_functional_annotator(cache_dir, variant_fallback, protein_fallback)
return PhenopacketPatientCreator(build, phenotype_creator, functional_annotator)


def _setup_phenotype_creator(hpo: hpotk.MinimalOntology,
validator: typing.Optional[hpotk.validate.ValidationRunner]) -> PhenotypeCreator:
Expand All @@ -70,23 +104,17 @@ def _configure_functional_annotator(cache_dir: str,
protein_fallback: str) -> FunctionalAnnotator:
# (1) ProteinMetadataService
# Setup fallback
if protein_fallback == 'UNIPROT':
fallback1 = UniprotProteinMetadataService()
else:
raise ValueError(f'Unknown protein fallback annotator type {protein_fallback}')
protein_fallback = _configure_fallback_protein_service(protein_fallback)
# Setup protein metadata cache
prot_cache_dir = os.path.join(cache_dir, 'protein_cache')
os.makedirs(prot_cache_dir, exist_ok=True)
prot_cache = ProteinAnnotationCache(prot_cache_dir)
# Assemble the final protein metadata service
protein_metadata_service = ProtCachingFunctionalAnnotator(prot_cache, fallback1)
protein_metadata_service = ProtCachingFunctionalAnnotator(prot_cache, protein_fallback)

# (2) FunctionalAnnotator
# Setup fallback
if variant_fallback == 'VEP':
fallback = VepFunctionalAnnotator(protein_metadata_service)
else:
raise ValueError(f'Unknown variant fallback annotator type {variant_fallback}')
fallback = _configure_fallback_functional(protein_metadata_service, variant_fallback)

# Setup variant cache
var_cache_dir = os.path.join(cache_dir, 'variant_cache')
Expand All @@ -97,3 +125,20 @@ def _configure_functional_annotator(cache_dir: str,
return VarCachingFunctionalAnnotator(var_cache, fallback)


def _configure_fallback_protein_service(protein_fallback: str) -> ProteinMetadataService:
if protein_fallback == 'UNIPROT':
fallback1 = UniprotProteinMetadataService()
else:
raise ValueError(f'Unknown protein fallback annotator type {protein_fallback}')
return fallback1


def _configure_fallback_functional(protein_metadata_service: ProteinMetadataService,
variant_fallback: str) -> FunctionalAnnotator:
if variant_fallback == 'VEP':
fallback = VepFunctionalAnnotator(protein_metadata_service)
else:
raise ValueError(f'Unknown variant fallback annotator type {variant_fallback}')
return fallback


Loading