From 064f8f85e32c54f5a40c67f5440cee5a530dab63 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Thu, 3 Oct 2024 13:29:17 +0200 Subject: [PATCH] adding new static measurement fxns --- docs/index.md | 2 +- docs/user-guide/excel.md | 2 +- docs/user-guide/variant_notation.md | 2 +- src/pyphetools/__init__.py | 2 +- src/pyphetools/creation/measurements.py | 30 +++++++++++++-- src/pyphetools/creation/variant_manager.py | 37 +++++++++++++------ .../visualization/phenopacket_table.py | 10 +++++ 7 files changed, 67 insertions(+), 18 deletions(-) diff --git a/docs/index.md b/docs/index.md index a007e5ca..ea3f4d15 100644 --- a/docs/index.md +++ b/docs/index.md @@ -10,7 +10,7 @@ from tabular data such as databases or supplemental files found in the medical l This documentation contains information about - How to use the [Excel template](user-guide/excel.md) to code clinical data -- How to use [pyphetools classes](user-guide/jupyter.md) to convert tabular data (e.g., supplemental tables) to phenopackets +- How to use [pyphetools classes](tabular/jupyter.md) to convert tabular data (e.g., supplemental tables) to phenopackets - Information for [developers](developers/developers.md) - A description of the pyphetools [API](api/overview.md) diff --git a/docs/user-guide/excel.md b/docs/user-guide/excel.md index cd597b27..1fe3d619 100644 --- a/docs/user-guide/excel.md +++ b/docs/user-guide/excel.md @@ -2,7 +2,7 @@ We have designed a format for Excel templates that can be used to quickly and efficiently generate collections of Phenopackets. This is currently the prefered way for clinicians and translational researchers to contribute to this project. The [pyphetools](https://github.com/monarch-initiative/pyphetools){:target="_blank"} library provides other means for bioinformaticians (please ask us). -The template can be downloaded [here](../_static/template.xlsx){:target="_blank"}. +The template file is generated for each disease as described in [template](template.md). # A format for cohort descriptions in excel diff --git a/docs/user-guide/variant_notation.md b/docs/user-guide/variant_notation.md index 85a5e62a..8c5f3141 100644 --- a/docs/user-guide/variant_notation.md +++ b/docs/user-guide/variant_notation.md @@ -1,6 +1,6 @@ # Variant Notation -We recommend that users choose one transcript for all HGVS variant descriptions in a project. In general, the most clinicallz relevant transcript should be chosen. +We recommend that users choose one transcript for all HGVS variant descriptions in a project. In general, the most clinically relevant transcript should be chosen. ### Choosing the reference transcript for a project diff --git a/src/pyphetools/__init__.py b/src/pyphetools/__init__.py index 1ddcd58c..65bd31d7 100644 --- a/src/pyphetools/__init__.py +++ b/src/pyphetools/__init__.py @@ -5,7 +5,7 @@ from . import validation -__version__ = "0.9.107" +__version__ = "0.9.108" __all__ = [ diff --git a/src/pyphetools/creation/measurements.py b/src/pyphetools/creation/measurements.py index 7836109e..f0440678 100644 --- a/src/pyphetools/creation/measurements.py +++ b/src/pyphetools/creation/measurements.py @@ -11,16 +11,19 @@ pg_per_l = OntologyClass202(id="UCUM:pg/L", label="picogram per liter") pg_per_ml = OntologyClass202(id="UCUM:pg/mL", label="picogram per milliliter") nmol_per_l = OntologyClass202(id="UCUM:nmol/L", label="nanomole per liter") +mmol_per_l= OntologyClass202(id="UCUM:mmol/L", label="millimole per liter") +percent = OntologyClass202(id="UCUM:%", label="percent") class Measurements: - - + """ + Convenience class with static methods to create Measurement objects for common units. + """ @staticmethod def _with_reference_range(assay: OntologyClass202, - unit: OntologyClass202, + unit: OntologyClass202, value: float, low: float, high: float) -> Measurement202: @@ -93,6 +96,27 @@ def nanomole_per_liter(code: str, high: float = None) -> Measurement202: assay = OntologyClass202(id=code, label=label) return Measurements._from_assay_and_values(assay=assay, unit=nmol_per_l, value=concentration, low=low, high=high) + + @staticmethod + def millimole_per_liter(code: str, + label: str, + concentration: float, + low: float = None, + high: float = None) -> Measurement202: + assay = OntologyClass202(id=code, label=label) + return Measurements._from_assay_and_values(assay=assay, unit=mmol_per_l, value=concentration, low=low, high=high) + + @staticmethod + def percent(code: str, + label: str, + concentration: float, + low: float = None, + high: float = None) -> Measurement202: + assay = OntologyClass202(id=code, label=label) + return Measurements._from_assay_and_values(assay=assay, unit=percent, value=concentration, low=low, high=high) + + + diff --git a/src/pyphetools/creation/variant_manager.py b/src/pyphetools/creation/variant_manager.py index 7ac7558b..7c27843c 100644 --- a/src/pyphetools/creation/variant_manager.py +++ b/src/pyphetools/creation/variant_manager.py @@ -1,7 +1,7 @@ import os import pickle import pandas as pd -from typing import List, Dict +from typing import List from collections import defaultdict from .individual import Individual from .variant_validator import VariantValidator @@ -48,6 +48,21 @@ class VariantManager: shows the other variants. These can be use to create chromosomal deletions, duplications, and inversions. Finally, the class can be used to add variants to a list of Individual objects. + If the Excel template is used, this class will be called internally and users do not need to use the code. If the + data is ingested manually, the class can be used as follows. + + gnas_symbol = "GNAS" + gnas_id = "HGNC:4392" + gnas_MANE_transcript = "NM_000516.7" + vmanager = VariantManager(df=df, + individual_column_name="individual", + transcript=gnas_MANE_transcript, + gene_id=gnas_id, + gene_symbol=gnas_symbol, + allele_1_column_name="allele_1") + + See [variant_manager](https://monarch-initiative.github.io/pyphetools/api/creation/variant_manager/) for more information. + :param df: DataFrame representing the input data :type df: pd.DataFrame :param individual_column_name: Name of the individual (patient) column @@ -99,13 +114,13 @@ def __init__(self, self._create_variant_d(overwrite) - def _format_pmid_id(self, identifier, pmid): + def _format_pmid_id(self, identifier, pmid) -> str: if pmid is not None: return f"{pmid}_{identifier}" else: return identifier - def _get_identifier_with_pmid(self, row:pd.Series): + def _get_identifier_with_pmid(self, row:pd.Series) -> str: """Get an identifier such as PMID_33087723_A2 for a daa row with PMID:33087723 and identifier within that publication A2 Identifiers such as P1 are commonly used and there is a risk of a clash with collections of phenopackets from various papers. @@ -118,7 +133,7 @@ def _get_identifier_with_pmid(self, row:pd.Series): else: return individual_id - def _create_variant_d(self, overwrite): + def _create_variant_d(self, overwrite) -> None: """ Creates a dictionary with all HGVS variants, and as a side effect creates a set with variants that are not HGVS and need to be mapped manually. This method has the following effects @@ -182,9 +197,9 @@ def _create_variant_d(self, overwrite): self._unmapped_alleles.add(v) # This allows us to use the chromosomal mappers. write_variant_pickle(name=self._gene_symbol, my_object=self._var_d) - def code_as_chromosomal_deletion(self, allele_set): + def code_as_chromosomal_deletion(self, allele_set) -> None: """ - Code as Structural variants - chromosomal deletion (to be added to self._var_d) + Code variants with the identifiers in "allele_set" as Structural variants (chromosomal deletion) :param allele_set: Set of alleles (strings) for coding as Structural variants (chromosomal deletion) """ # first check that all of the alleles are in self._unmapped_alleles @@ -200,9 +215,9 @@ def code_as_chromosomal_deletion(self, allele_set): self._unmapped_alleles.remove(allele) self._var_d[allele] = var - def code_as_chromosomal_duplication(self, allele_set): + def code_as_chromosomal_duplication(self, allele_set) -> None: """ - Code as Structural variants - chromosomal duplication (to be added to self._var_d) + Code variants with the identifiers in "allele_set" as Structural variants (chromosomal duplication) :param allele_set: Set of alleles (strings) for coding as Structural variants (chromosomal duplication) """ # first check that all of the alleles are in self._unmapped_alleles @@ -217,7 +232,7 @@ def code_as_chromosomal_duplication(self, allele_set): def code_as_chromosomal_inversion(self, allele_set) -> None: """ - Code as Structural variants - chromosomal inversion (to be added to self._var_d) + Code variants with the identifiers in "allele_set" as Structural variants (chromosomal inversion) :param allele_set: Set of alleles (strings) for coding as Structural variants (chromosomal inversion) """ # first check that all of the alleles are in self._unmapped_alleles @@ -232,8 +247,8 @@ def code_as_chromosomal_inversion(self, allele_set) -> None: def code_as_chromosomal_translocation(self, allele_set) -> None: """ - Code as Structural variants - chromosomal translocation (to be added to self._var_d) - :param allele_set: Set of alleles (strings) for coding as Structural variants (chromosomal inversion) + Code variants with the identifiers in "allele_set" as Structural variants (chromosomal translocation) + :param allele_set: Set of alleles (strings) for coding as Structural variants (chromosomal translocation) """ # first check that all of the alleles are in self._unmapped_alleles if not allele_set.issubset(self._unmapped_alleles): diff --git a/src/pyphetools/visualization/phenopacket_table.py b/src/pyphetools/visualization/phenopacket_table.py index ec13894b..01f5166c 100644 --- a/src/pyphetools/visualization/phenopacket_table.py +++ b/src/pyphetools/visualization/phenopacket_table.py @@ -7,6 +7,7 @@ from ..creation import Individual, HpTerm, MetaData from .simple_patient import SimplePatient from .html_table_generator import HtmlTableGenerator +from ..pp.v202._base import TimeElement as TimeElement202 # @@ -24,6 +25,15 @@ def __init__(self, age, days) -> None: self.key = age self.days = days + def __rep__(self): + """ + self.key can be either a TimeElement or a simple string. + """ + if isinstance(self.key, TimeElement202): + return self.key.display_time_element() + else: + return self.key + #@DeprecationWarning("This class will be replaced by IndividualTable and will be deleted in a future version") class PhenopacketTable: