Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Develop strategy for handling errors in the input data #127

Closed
wants to merge 12 commits into from
4 changes: 2 additions & 2 deletions docs/user-guide/input-data.rst
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,8 @@ Here we walk the file system, load all phenopacket JSON files, and transform the
... pp_path = os.path.join(dirpath, filename)
... with open(pp_path) as fh:
... pp = Parse(fh.read(), Phenopacket())
... patient = patient_creator.create_patient(pp)
... patients.append(patient)
... output = patient_creator.process(pp)
... patients.append(output.outcome)


>>> f'Loaded {len(patients)} phenopackets'
Expand Down
52 changes: 26 additions & 26 deletions src/genophenocorr/data/_toy.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,132 +77,132 @@ def get_toy_cohort() -> Cohort:
Genotypes.empty()) # Not used in the patients below, hence `empty()`.

patients = (
Patient('A',
Patient(SampleLabels('A'),
phenotypes=(arachnodactyly_T, spasticity_F, seizure_T),
variants=[snv],
proteins=[prot]
),
Patient('B',
Patient(SampleLabels('B'),
phenotypes=(arachnodactyly_T, seizure_T, spasticity_T),
variants=[snv],
proteins=[prot]
),
Patient('C',
Patient(SampleLabels('C'),
phenotypes=(arachnodactyly_F, spasticity_T, seizure_T),
variants=[snv],
proteins=[prot]
),
Patient('D',
Patient(SampleLabels('D'),
phenotypes=(arachnodactyly_T, spasticity_T, seizure_T),
variants=[snv, deletion],
proteins=[prot]
),
Patient('E',
Patient(SampleLabels('E'),
phenotypes=(arachnodactyly_T, spasticity_T, seizure_F),
variants=[snv],
proteins=[prot]
),
Patient('F',
Patient(SampleLabels('F'),
phenotypes=(arachnodactyly_F, spasticity_F, seizure_T),
variants=[deletion],
proteins=[prot]
),
Patient('G',
Patient(SampleLabels('G'),
phenotypes=(arachnodactyly_T, seizure_T, spasticity_T),
variants=[snv, deletion],
proteins=[prot]
),
Patient('H',
Patient(SampleLabels('H'),
phenotypes=(arachnodactyly_T, seizure_T, spasticity_F),
variants=[deletion],
proteins=[prot]
),
Patient('I',
Patient(SampleLabels('I'),
phenotypes=(arachnodactyly_F, spasticity_F, seizure_T),
variants=[deletion],
proteins=[prot]
),
Patient('J',
Patient(SampleLabels('J'),
phenotypes=(arachnodactyly_T, seizure_T, spasticity_T),
variants=[snv],
proteins=[prot]
),
Patient('K',
Patient(SampleLabels('K'),
phenotypes=(arachnodactyly_F, spasticity_T, seizure_T),
variants=[snv],
proteins=[prot]
),
Patient('L',
Patient(SampleLabels('L'),
phenotypes=(arachnodactyly_F, seizure_F, spasticity_F),
variants=[deletion],
proteins=[prot]
),
Patient('M',
Patient(SampleLabels('M'),
phenotypes=(arachnodactyly_T, seizure_F, spasticity_T),
variants=[snv],
proteins=[prot]
),
Patient('N',
Patient(SampleLabels('N'),
phenotypes=(arachnodactyly_F, seizure_T, spasticity_F),
variants=[snv],
proteins=[prot]
),
Patient('O',
Patient(SampleLabels('O'),
phenotypes=(arachnodactyly_F, seizure_F, spasticity_T),
variants=[deletion],
proteins=[prot]
),
Patient('P',
Patient(SampleLabels('P'),
phenotypes=(arachnodactyly_T, seizure_T, spasticity_F),
variants=[snv],
proteins=[prot]
),
Patient('Q',
Patient(SampleLabels('Q'),
phenotypes=(arachnodactyly_T, seizure_F, spasticity_F),
variants=[snv],
proteins=[prot]
),
Patient('R',
Patient(SampleLabels('R'),
phenotypes=(arachnodactyly_T, seizure_T, spasticity_F),
variants=[snv, deletion],
proteins=[prot]
),
Patient('S',
Patient(SampleLabels('S'),
phenotypes=(arachnodactyly_F, seizure_T, spasticity_T),
variants=[deletion],
proteins=[prot]
),
Patient('T',
Patient(SampleLabels('T'),
phenotypes=(arachnodactyly_T, seizure_F, spasticity_T),
variants=[snv],
proteins=[prot]
),
Patient('U',
Patient(SampleLabels('U'),
phenotypes=(arachnodactyly_F, seizure_T, spasticity_T),
variants=[deletion],
proteins=[prot]
),
Patient('V',
Patient(SampleLabels('V'),
phenotypes=(arachnodactyly_T, seizure_T, spasticity_T),
variants=[snv],
proteins=[prot]
),
Patient('W',
Patient(SampleLabels('W'),
phenotypes=(arachnodactyly_F, seizure_T, spasticity_T),
variants=[deletion],
proteins=[prot]
),
Patient('X',
Patient(SampleLabels('X'),
phenotypes=(arachnodactyly_F, seizure_T, spasticity_T),
variants=[deletion],
proteins=[prot]
),
Patient('Y',
Patient(SampleLabels('Y'),
phenotypes=(arachnodactyly_T, seizure_T, spasticity_T),
variants=[snv],
proteins=[prot]
),
Patient('Z',
Patient(SampleLabels('Z'),
phenotypes=(arachnodactyly_F, seizure_T, spasticity_T),
variants=[deletion],
proteins=[prot]
Expand Down
4 changes: 2 additions & 2 deletions src/genophenocorr/model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
and protein info.
"""
from . import genome

from ._base import SampleLabels
from ._cohort import Cohort, Patient
from ._gt import Genotype, Genotypes, Genotyped
from ._phenotype import Phenotype
Expand All @@ -14,7 +14,7 @@
from ._variant_effects import VariantEffect

__all__ = [
'Cohort', 'Patient',
'Cohort', 'Patient', 'SampleLabels',
'Phenotype',
'Variant', 'VariantCoordinates', 'Genotype', 'Genotypes', 'Genotyped',
'TranscriptAnnotation', 'VariantEffect', 'TranscriptInfoAware', 'TranscriptCoordinates',
Expand Down
60 changes: 60 additions & 0 deletions src/genophenocorr/model/_base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import typing

import hpotk


class SampleLabels:
"""
A data model for subject identifiers.

The subject has a mandatory :attr:`label` and an optional :attr:`meta_label`.

The identifiers support natural ordering, equality tests, and are hashable.
"""

def __init__(self, label: str,
meta_label: typing.Optional[str] = None):
self._label = hpotk.util.validate_instance(label, str, 'label')
self._meta_label = hpotk.util.validate_optional_instance(meta_label, str, 'meta_label')

@property
def label(self) -> str:
return self._label

@property
def meta_label(self) -> typing.Optional[str]:
return self._meta_label

def label_summary(self) -> str:
"""
Summarize `label` and `meta_label` into a `str` where the sub-parts are inserted as ``<label>[<meta_label>]``.
"""
return self._label if self._meta_label is None else f'{self._label}[{self._meta_label}]'

def __eq__(self, other):
return isinstance(other, SampleLabels) and self._label == other.label and self._meta_label == other._meta_label

def __lt__(self, other):
if isinstance(other, SampleLabels):
if self._label < other._label:
return True
elif self._label == other._label:
if self._meta_label is None or other._meta_label is None:
if self._meta_label == other._meta_label:
return False
else:
return True if self._meta_label is None else False # `None` is less
else:
return self._meta_label < other._meta_label
return False
else:
return NotImplemented

def __hash__(self):
return hash((self._label, self._meta_label))

def __str__(self):
return self.label_summary()

def __repr__(self):
return f'SampleLabels(label={self._label}, meta_label={self._meta_label})'
20 changes: 14 additions & 6 deletions src/genophenocorr/model/_cohort.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import typing
from collections import Counter

from ._base import SampleLabels
from ._phenotype import Phenotype
from ._protein import ProteinMetadata
from ._variant import Variant
Expand All @@ -10,25 +11,25 @@ class Patient:
"""A class that represents an individual patient

Attributes:
patient_id (string): A string unique to this Patient object
patient_id (SampleLabels): The patient identifiers
phenotypes (Sequence[Phenotype]): A list of Phenotype objects
variants (Sequence[Variant]): A list of Variant objects
proteins (Sequence[ProteinMetadata]): A list of ProteinMetadata objects
"""

def __init__(self, patient_id: str,
def __init__(self, labels: SampleLabels,
phenotypes: typing.Iterable[Phenotype],
variants: typing.Iterable[Variant],
proteins: typing.Iterable[ProteinMetadata]):
"""Constructs all necessary attributes for a Patient object

Args:
patient_id (string): A string unique to this Patient object
labels (string): A string unique to this Patient object
phenotypes (Iterable[Phenotype]): A list of Phenotype objects
variants (Iterable[Variant]): A list of Variant objects
proteins (Iterable[ProteinMetadata]): A list of ProteinMetadata objects
"""
self._id = patient_id
self._labels = labels
self._phenotypes = tuple(phenotypes)
self._variants = tuple(variants)
self._proteins = tuple(proteins)
Expand All @@ -39,7 +40,14 @@ def patient_id(self) -> str:
Returns:
string: Patient ID unique to this Patient object
"""
return self._id
return self._labels.label_summary()

@property
def labels(self) -> SampleLabels:
"""
Get the sample identifiers.
"""
return self._labels

@property
def phenotypes(self) -> typing.Sequence[Phenotype]:
Expand Down Expand Up @@ -79,7 +87,7 @@ def excluded_phenotypes(self) -> typing.Iterator[Phenotype]:

def __str__(self) -> str:
return (f"Patient("
f"patient_id:{self.patient_id}, "
f"labels:{self._labels}, "
f"variants:{self.variants}, "
f"phenotypes:{[pheno.identifier for pheno in self.phenotypes]}, "
f"proteins:{[prot.protein_id for prot in self.proteins]})")
Expand Down
Loading