diff --git a/docs/user-guide/input-data.rst b/docs/user-guide/input-data.rst index c5f2afabb..40e679bf5 100644 --- a/docs/user-guide/input-data.rst +++ b/docs/user-guide/input-data.rst @@ -77,8 +77,8 @@ Here we walk the file system, load all phenopacket JSON files, and transform the ... pp_path = os.path.join(dirpath, filename) ... with open(pp_path) as fh: ... pp = Parse(fh.read(), Phenopacket()) - ... patient = patient_creator.create_patient(pp) - ... patients.append(patient) + ... output = patient_creator.process(pp) + ... patients.append(output.outcome) >>> f'Loaded {len(patients)} phenopackets' diff --git a/src/genophenocorr/data/_toy.py b/src/genophenocorr/data/_toy.py index 7a918c1a4..76c90e35a 100644 --- a/src/genophenocorr/data/_toy.py +++ b/src/genophenocorr/data/_toy.py @@ -77,132 +77,132 @@ def get_toy_cohort() -> Cohort: Genotypes.empty()) # Not used in the patients below, hence `empty()`. patients = ( - Patient('A', + Patient(SampleLabels('A'), phenotypes=(arachnodactyly_T, spasticity_F, seizure_T), variants=[snv], proteins=[prot] ), - Patient('B', + Patient(SampleLabels('B'), phenotypes=(arachnodactyly_T, seizure_T, spasticity_T), variants=[snv], proteins=[prot] ), - Patient('C', + Patient(SampleLabels('C'), phenotypes=(arachnodactyly_F, spasticity_T, seizure_T), variants=[snv], proteins=[prot] ), - Patient('D', + Patient(SampleLabels('D'), phenotypes=(arachnodactyly_T, spasticity_T, seizure_T), variants=[snv, deletion], proteins=[prot] ), - Patient('E', + Patient(SampleLabels('E'), phenotypes=(arachnodactyly_T, spasticity_T, seizure_F), variants=[snv], proteins=[prot] ), - Patient('F', + Patient(SampleLabels('F'), phenotypes=(arachnodactyly_F, spasticity_F, seizure_T), variants=[deletion], proteins=[prot] ), - Patient('G', + Patient(SampleLabels('G'), phenotypes=(arachnodactyly_T, seizure_T, spasticity_T), variants=[snv, deletion], proteins=[prot] ), - Patient('H', + Patient(SampleLabels('H'), phenotypes=(arachnodactyly_T, seizure_T, spasticity_F), variants=[deletion], proteins=[prot] ), - Patient('I', + Patient(SampleLabels('I'), phenotypes=(arachnodactyly_F, spasticity_F, seizure_T), variants=[deletion], proteins=[prot] ), - Patient('J', + Patient(SampleLabels('J'), phenotypes=(arachnodactyly_T, seizure_T, spasticity_T), variants=[snv], proteins=[prot] ), - Patient('K', + Patient(SampleLabels('K'), phenotypes=(arachnodactyly_F, spasticity_T, seizure_T), variants=[snv], proteins=[prot] ), - Patient('L', + Patient(SampleLabels('L'), phenotypes=(arachnodactyly_F, seizure_F, spasticity_F), variants=[deletion], proteins=[prot] ), - Patient('M', + Patient(SampleLabels('M'), phenotypes=(arachnodactyly_T, seizure_F, spasticity_T), variants=[snv], proteins=[prot] ), - Patient('N', + Patient(SampleLabels('N'), phenotypes=(arachnodactyly_F, seizure_T, spasticity_F), variants=[snv], proteins=[prot] ), - Patient('O', + Patient(SampleLabels('O'), phenotypes=(arachnodactyly_F, seizure_F, spasticity_T), variants=[deletion], proteins=[prot] ), - Patient('P', + Patient(SampleLabels('P'), phenotypes=(arachnodactyly_T, seizure_T, spasticity_F), variants=[snv], proteins=[prot] ), - Patient('Q', + Patient(SampleLabels('Q'), phenotypes=(arachnodactyly_T, seizure_F, spasticity_F), variants=[snv], proteins=[prot] ), - Patient('R', + Patient(SampleLabels('R'), phenotypes=(arachnodactyly_T, seizure_T, spasticity_F), variants=[snv, deletion], proteins=[prot] ), - Patient('S', + Patient(SampleLabels('S'), phenotypes=(arachnodactyly_F, seizure_T, spasticity_T), variants=[deletion], proteins=[prot] ), - Patient('T', + Patient(SampleLabels('T'), phenotypes=(arachnodactyly_T, seizure_F, spasticity_T), variants=[snv], proteins=[prot] ), - Patient('U', + Patient(SampleLabels('U'), phenotypes=(arachnodactyly_F, seizure_T, spasticity_T), variants=[deletion], proteins=[prot] ), - Patient('V', + Patient(SampleLabels('V'), phenotypes=(arachnodactyly_T, seizure_T, spasticity_T), variants=[snv], proteins=[prot] ), - Patient('W', + Patient(SampleLabels('W'), phenotypes=(arachnodactyly_F, seizure_T, spasticity_T), variants=[deletion], proteins=[prot] ), - Patient('X', + Patient(SampleLabels('X'), phenotypes=(arachnodactyly_F, seizure_T, spasticity_T), variants=[deletion], proteins=[prot] ), - Patient('Y', + Patient(SampleLabels('Y'), phenotypes=(arachnodactyly_T, seizure_T, spasticity_T), variants=[snv], proteins=[prot] ), - Patient('Z', + Patient(SampleLabels('Z'), phenotypes=(arachnodactyly_F, seizure_T, spasticity_T), variants=[deletion], proteins=[prot] diff --git a/src/genophenocorr/model/__init__.py b/src/genophenocorr/model/__init__.py index 6144a9423..c6c2568ab 100644 --- a/src/genophenocorr/model/__init__.py +++ b/src/genophenocorr/model/__init__.py @@ -4,7 +4,7 @@ and protein info. """ from . import genome - +from ._base import SampleLabels from ._cohort import Cohort, Patient from ._gt import Genotype, Genotypes, Genotyped from ._phenotype import Phenotype @@ -14,7 +14,7 @@ from ._variant_effects import VariantEffect __all__ = [ - 'Cohort', 'Patient', + 'Cohort', 'Patient', 'SampleLabels', 'Phenotype', 'Variant', 'VariantCoordinates', 'Genotype', 'Genotypes', 'Genotyped', 'TranscriptAnnotation', 'VariantEffect', 'TranscriptInfoAware', 'TranscriptCoordinates', diff --git a/src/genophenocorr/model/_base.py b/src/genophenocorr/model/_base.py new file mode 100644 index 000000000..36716831e --- /dev/null +++ b/src/genophenocorr/model/_base.py @@ -0,0 +1,60 @@ +import typing + +import hpotk + + +class SampleLabels: + """ + A data model for subject identifiers. + + The subject has a mandatory :attr:`label` and an optional :attr:`meta_label`. + + The identifiers support natural ordering, equality tests, and are hashable. + """ + + def __init__(self, label: str, + meta_label: typing.Optional[str] = None): + self._label = hpotk.util.validate_instance(label, str, 'label') + self._meta_label = hpotk.util.validate_optional_instance(meta_label, str, 'meta_label') + + @property + def label(self) -> str: + return self._label + + @property + def meta_label(self) -> typing.Optional[str]: + return self._meta_label + + def label_summary(self) -> str: + """ + Summarize `label` and `meta_label` into a `str` where the sub-parts are inserted as ``