diff --git a/src/pheval_exomiser/post_process/post_process_results_format.py b/src/pheval_exomiser/post_process/post_process_results_format.py index 31f4191..6fb1bc0 100644 --- a/src/pheval_exomiser/post_process/post_process_results_format.py +++ b/src/pheval_exomiser/post_process/post_process_results_format.py @@ -1,5 +1,8 @@ #!/usr/bin/python +import copy import json +import uuid +from enum import Enum from pathlib import Path import click @@ -12,6 +15,21 @@ from pheval.utils.file_utils import files_with_suffix +class ModeOfInheritance(Enum): + """Enumeration representing mode of inheritance.""" + + AUTOSOMAL_DOMINANT = 1 + """Autosomal dominant mode of inheritance.""" + AUTOSOMAL_RECESSIVE = 2 + """Autosomal recessive mode of inheritance.""" + X_DOMINANT = 1 + """X dominant mode of inheritance.""" + X_RECESSIVE = 2 + """X recessive mode of inheritance.""" + MITOCHONDRIAL = 3 + """Mitochondrial mode of inheritance.""" + + def read_exomiser_json_result(exomiser_result_path: Path) -> dict: """Load Exomiser json result.""" with open(exomiser_result_path) as exomiser_json_result: @@ -65,6 +83,7 @@ class PhEvalVariantResultFromExomiserJsonCreator: def __init__(self, exomiser_json_result: [dict], score_name: str): self.exomiser_json_result = exomiser_json_result self.score_name = score_name + self.variant_grouping = {} @staticmethod def _find_chromosome(result_entry: dict) -> str: @@ -94,16 +113,28 @@ def _find_alt(result_entry: dict) -> str: else: return "" - def _find_relevant_score(self, result_entry) -> float: + def _find_relevant_score(self, result_entry: dict) -> float: """Return score from Exomiser result entry.""" return round(result_entry[self.score_name], 4) + @staticmethod + def _find_gene_symbol(result_entry: dict) -> str: + """Return gene symbol from Exomiser result entry.""" + return result_entry["geneSymbol"] + + @staticmethod + def _find_mode_of_inheritance(result_entry: dict) -> str: + """Return mode of inheritance from Exomiser result entry.""" + return result_entry["modeOfInheritance"] + def _filter_for_acmg_assignments( self, variant: PhEvalVariantResult, score: float, variant_acmg_assignments: dict ) -> bool: """Filter variants if they meet the PATHOGENIC or LIKELY_PATHOGENIC ACMG classification.""" for assignment in variant_acmg_assignments: - if variant == PhEvalVariantResult( + variant_copy = copy.deepcopy(variant) + variant_copy.grouping_id = None + if variant_copy == PhEvalVariantResult( chromosome=self._find_chromosome(assignment["variantEvaluation"]), start=self._find_start_pos(assignment["variantEvaluation"]), end=self._find_end_pos(assignment["variantEvaluation"]), @@ -116,6 +147,24 @@ def _filter_for_acmg_assignments( ): return True + @staticmethod + def _define_grouping_id(): + """Define a unique id for grouping results.""" + return str(uuid.uuid4()) + + def add_or_find_variant_group(self, score: float, gene_symbol: str, moi: int) -> str: + """ + Retrieves the grouping ID for a variant based on score, gene symbol, and mode of inheritance (MOI). + If no grouping ID exists for the specified combination, a new ID is generated, stored, and returned. + """ + key = f"{moi}|{gene_symbol}|{score}" + if key in self.variant_grouping: + return self.variant_grouping[key] + else: + grouping_id = self._define_grouping_id() + self.variant_grouping[key] = grouping_id + return grouping_id + def extract_pheval_variant_requirements( self, use_acmg_filter: bool = False ) -> [PhEvalVariantResult]: @@ -129,6 +178,11 @@ def extract_pheval_variant_requirements( contributing_variants = gene_hit["contributingVariants"] variant_acmg_assignments = gene_hit["acmgAssignments"] for cv in contributing_variants: + grouping_id = self.add_or_find_variant_group( + score, + self._find_gene_symbol(result_entry), + ModeOfInheritance[self._find_mode_of_inheritance(gene_hit)].value, + ) variant = PhEvalVariantResult( chromosome=self._find_chromosome(cv), start=self._find_start_pos(cv), @@ -136,6 +190,7 @@ def extract_pheval_variant_requirements( ref=self._find_ref(cv), alt=self._find_alt(cv), score=score, + grouping_id=grouping_id, ) if use_acmg_filter and self._filter_for_acmg_assignments( variant, score, variant_acmg_assignments @@ -156,7 +211,7 @@ def _find_disease_name(result_entry: dict) -> str: return result_entry["diseaseTerm"] @staticmethod - def _find_disease_identifier(result_entry: dict) -> int: + def _find_disease_identifier(result_entry: dict) -> str: """Return disease ID from Exomiser result entry.""" return result_entry["diseaseId"] diff --git a/tests/test_post_process_results_format.py b/tests/test_post_process_results_format.py index 4c18414..c82d6ba 100644 --- a/tests/test_post_process_results_format.py +++ b/tests/test_post_process_results_format.py @@ -1,5 +1,6 @@ import unittest from copy import copy +from unittest.mock import patch from pheval.post_processing.post_processing import ( PhEvalDiseaseResult, @@ -2665,31 +2666,71 @@ def test__filter_for_acmg_assignments_uncertain_significance(self): ) ) - def test_extract_pheval_variant_requirements(self): + @patch.object( + PhEvalVariantResultFromExomiserJsonCreator, + "add_or_find_variant_group", + return_value="mocked_id", + ) + def test_extract_pheval_variant_requirements(self, mock_add_or_find_variant_group): self.assertEqual( self.json_result.extract_pheval_variant_requirements(), [ PhEvalVariantResult( - chromosome="3", start=126730873, end=126730873, ref="G", alt="A", score=0.0484 + chromosome="3", + start=126730873, + end=126730873, + ref="G", + alt="A", + score=0.0484, + grouping_id="mocked_id", ), PhEvalVariantResult( - chromosome="3", start=126730873, end=126730873, ref="G", alt="A", score=0.0484 + chromosome="3", + start=126730873, + end=126730873, + ref="G", + alt="A", + score=0.0484, + grouping_id="mocked_id", ), PhEvalVariantResult( - chromosome="3", start=126741108, end=126741108, ref="G", alt="A", score=0.0484 + chromosome="3", + start=126741108, + end=126741108, + ref="G", + alt="A", + score=0.0484, + grouping_id="mocked_id", ), ], ) - def test_extract_pheval_variant_requirements_filter_acmg(self): + @patch.object( + PhEvalVariantResultFromExomiserJsonCreator, + "add_or_find_variant_group", + return_value="mocked_id", + ) + def test_extract_pheval_variant_requirements_filter_acmg(self, mock_add_or_find_variant_group): self.assertEqual( self.json_result.extract_pheval_variant_requirements(True), [ PhEvalVariantResult( - chromosome="3", start=126730873, end=126730873, ref="G", alt="A", score=0.0484 + chromosome="3", + start=126730873, + end=126730873, + ref="G", + alt="A", + score=0.0484, + grouping_id="mocked_id", ), PhEvalVariantResult( - chromosome="3", start=126741108, end=126741108, ref="G", alt="A", score=0.0484 + chromosome="3", + start=126741108, + end=126741108, + ref="G", + alt="A", + score=0.0484, + grouping_id="mocked_id", ), ], ) diff --git a/tox.ini b/tox.ini index b232ee1..d447ee9 100644 --- a/tox.ini +++ b/tox.ini @@ -12,7 +12,6 @@ envlist = [testenv] commands = - pip install -e ../pheval coverage run -p -m pytest --durations=20 {posargs:tests} coverage combine coverage xml