From 11af3609a06235ac8a702c749166878a220f8964 Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Fri, 10 Nov 2023 17:56:01 -0800 Subject: [PATCH] Cigar tools: improve semantics of "closest" It is now based on op index. --- micall/core/contig_stitcher.py | 6 +++-- micall/tests/test_cigar_tools.py | 4 +-- micall/utils/cigar_tools.py | 44 ++++++++++++++++++++------------ 3 files changed, 34 insertions(+), 20 deletions(-) diff --git a/micall/core/contig_stitcher.py b/micall/core/contig_stitcher.py index 0719c51ac..fe77cd85f 100644 --- a/micall/core/contig_stitcher.py +++ b/micall/core/contig_stitcher.py @@ -297,8 +297,10 @@ def covered(contig, gap): def gap_boundaries(gap): midpoint = gap.r_st + (gap.r_ei - gap.r_st) / 2 left_slice, right_slice = contig.cut_reference(floor(midpoint) + 0.5) - left_closest_query = left_slice.alignment.coordinate_mapping.ref_to_closest_query(midpoint) - right_closest_query = right_slice.alignment.coordinate_mapping.ref_to_closest_query(midpoint) + left_midpoint_ref = left_slice.alignment.coordinate_mapping.find_closest_ref(midpoint) + left_closest_query = left_slice.alignment.coordinate_mapping.ref_to_closest_query(left_midpoint_ref) + right_midpoint_ref = right_slice.alignment.coordinate_mapping.find_closest_ref(midpoint) + right_closest_query = right_slice.alignment.coordinate_mapping.ref_to_closest_query(right_midpoint_ref) left_closest_ref = left_slice.alignment.coordinate_mapping.query_to_ref(left_closest_query) right_closest_ref = right_slice.alignment.coordinate_mapping.query_to_ref(right_closest_query) return (left_closest_ref, right_closest_ref) diff --git a/micall/tests/test_cigar_tools.py b/micall/tests/test_cigar_tools.py index 65591f539..eac0a0b19 100644 --- a/micall/tests/test_cigar_tools.py +++ b/micall/tests/test_cigar_tools.py @@ -51,8 +51,8 @@ {0: 0, 1: 1, 2: 2, 3: 3, 4: 3, 5: 3, 6: 4, 7: 4, 8: 5}), # Edge cases - ('', {}, ValueError()), - ('12I', {}, ValueError()), + ('', {}, KeyError()), + ('12I', {}, KeyError()), ('12D', {}, ValueError()), ] diff --git a/micall/utils/cigar_tools.py b/micall/utils/cigar_tools.py index 48b428e03..3f6d40eb3 100644 --- a/micall/utils/cigar_tools.py +++ b/micall/utils/cigar_tools.py @@ -47,17 +47,16 @@ def __init__(self): def extend(self, ref_index: Optional[int], query_index: Optional[int], - op_index: Optional[int]): + op_index: int): if ref_index is not None and query_index is not None: self.ref_to_query_d[ref_index] = query_index self.query_to_ref_d[query_index] = ref_index - if op_index is not None: - if ref_index is not None: - self.ref_to_op_d[ref_index] = op_index - if query_index is not None: - self.query_to_op_d[query_index] = op_index + if ref_index is not None: + self.ref_to_op_d[ref_index] = op_index + if query_index is not None: + self.query_to_op_d[query_index] = op_index def mapped_reference_coordinates(self) -> Set[int]: @@ -84,27 +83,40 @@ def query_to_ref(self, index) -> Optional[int]: return self.query_to_ref_d.get(index, None) + def ref_to_leftsup_query(self, index) -> Optional[int]: + left_neihbourhood = (k for (k, v) in self.query_to_ref_d.items() if v <= index) + return max(left_neihbourhood, default=None) + + + def ref_to_rightinf_query(self, index) -> Optional[int]: + right_neihbourhood = (k for (k, v) in self.query_to_ref_d.items() if index <= v) + return min(right_neihbourhood, default=None) + + + @staticmethod + def _find_closest(collection, value) -> int: + return min(collection, key=lambda x: abs(x - value)) + + @staticmethod def _find_closest_key(mapping: dict, index: int) -> int: return min(mapping, key=lambda k: abs(mapping[k] - index)) - def ref_to_closest_query(self, index) -> int: - return CoordinateMapping._find_closest_key(self.query_to_ref_d, index) + def find_closest_ref(self, index) -> int: + return CoordinateMapping._find_closest(self.all_reference_coordinates(), index) - def query_to_closest_ref(self, index) -> int: - return CoordinateMapping._find_closest_key(self.ref_to_query_d, index) + def find_closest_query(self, index) -> int: + return CoordinateMapping._find_closest(self.all_query_coordinates(), index) - def ref_to_leftsup_query(self, index) -> Optional[int]: - left_neihbourhood = (k for (k, v) in self.query_to_ref_d.items() if v <= index) - return max(left_neihbourhood, default=None) + def ref_to_closest_query(self, index) -> int: + return CoordinateMapping._find_closest_key(self.query_to_op_d, self.ref_to_op_d[index]) - def ref_to_rightinf_query(self, index) -> Optional[int]: - right_neihbourhood = (k for (k, v) in self.query_to_ref_d.items() if index <= v) - return min(right_neihbourhood, default=None) + def query_to_closest_ref(self, index) -> int: + return CoordinateMapping._find_closest_key(self.ref_to_op_d, self.query_to_op_d[index]) def ref_or_query_to_op(self, ref_index: int, query_index: int, conflict):