diff --git a/micall/core/contig_stitcher.py b/micall/core/contig_stitcher.py index cf4c382fa..150022506 100644 --- a/micall/core/contig_stitcher.py +++ b/micall/core/contig_stitcher.py @@ -287,9 +287,8 @@ def find_most_covered(contigs) -> Optional[AlignedContig]: def split_contigs_with_gaps(contigs: List[AlignedContig]) -> Iterable[AlignedContig]: def covered_by(gap, contig): # TODO(vitalik): implement the more precise check - possible_reference_coordinates = set(range(gap.r_st, gap.r_ei + 1)) - return possible_reference_coordinates \ - .issubset(contig.alignment.coordinate_mapping.reference_coordinates()) + return gap.coordinate_mapping.all_reference_coordinates() \ + .issubset(contig.alignment.coordinate_mapping.mapped_reference_coordinates()) def covered(contig, gap): return any(covered_by(gap, other) for other in contigs diff --git a/micall/tests/test_cigar_tools.py b/micall/tests/test_cigar_tools.py index b61912207..65591f539 100644 --- a/micall/tests/test_cigar_tools.py +++ b/micall/tests/test_cigar_tools.py @@ -30,13 +30,13 @@ ('5M3I4M', {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 8, 6: 9, 7: 10, 8: 11}, {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 8, 6: 9, 7: 10, 8: 11}), ('1M1D', {0: 0}, - {0: 0}), + {0: 0, 1: 0}), ('1M1I', {0: 0}, {0: 0}), ('1I1M', {0: 1}, {0: 1}), ('1D1M', {1: 0}, - {1: 0}), + {1: 0, 0: 0}), # Multiple deletions and insertions ('2M2D2M2I2M', {0: 0, 1: 1, 4: 2, 5: 3, 6: 6, 7: 7}, @@ -62,7 +62,8 @@ def test_cigar_to_coordinate_mapping(cigar_str, expected_mapping): mapping = Cigar.coerce(cigar_str).coordinate_mapping assert expected_mapping == mapping.ref_to_query_d - assert expected_mapping == {i: mapping.ref_to_query(i) for i in mapping.reference_coordinates()} + assert expected_mapping == {i: mapping.ref_to_query(i) + for i in mapping.mapped_reference_coordinates()} @pytest.mark.parametrize("cigar_str", [x[0] for x in cigar_mapping_cases]) @@ -86,9 +87,8 @@ def test_cigar_to_closest_coordinate_mapping(cigar_str, expected_closest_mapping mapping.ref_to_closest_query(0) else: - fullrange = {i: mapping.ref_to_closest_query(i) \ - for i in range(min(mapping.reference_coordinates()), - 1 + max(mapping.reference_coordinates()))} + fullrange = {i: mapping.ref_to_closest_query(i) + for i in mapping.all_reference_coordinates()} assert expected_closest_mapping == fullrange @@ -103,7 +103,8 @@ def test_cigar_hit_to_coordinate_mapping(cigar_str, expected_mapping): assert mapping.ref_to_query(0) == None assert mapping.query_to_ref(0) == None assert expected_mapping \ - == {i: mapping.ref_to_query(i) for i in mapping.reference_coordinates()} + == {i: mapping.ref_to_query(i) + for i in mapping.mapped_reference_coordinates()} @pytest.mark.parametrize("cigar_str, expected_closest_mapping", [(x[0], x[2]) for x in cigar_mapping_cases]) @@ -119,9 +120,8 @@ def test_cigar_hit_to_coordinate_closest_mapping(cigar_str, expected_closest_map else: # Coordinates are translated by q_st and r_st. expected_closest_mapping = {k + hit.r_st: v + hit.q_st for (k, v) in expected_closest_mapping.items()} - fullrange = {i: mapping.ref_to_closest_query(i) \ - for i in range(min(mapping.reference_coordinates()), - 1 + max(mapping.reference_coordinates()))} + fullrange = {i: mapping.ref_to_closest_query(i) + for i in mapping.all_reference_coordinates()} assert expected_closest_mapping == fullrange diff --git a/micall/utils/cigar_tools.py b/micall/utils/cigar_tools.py index 0de61b76f..48b428e03 100644 --- a/micall/utils/cigar_tools.py +++ b/micall/utils/cigar_tools.py @@ -60,14 +60,22 @@ def extend(self, self.query_to_op_d[query_index] = op_index - def reference_coordinates(self) -> Set[int]: + def mapped_reference_coordinates(self) -> Set[int]: return set(self.ref_to_query_d.keys()) - def query_coordinates(self) -> Set[int]: + def all_reference_coordinates(self) -> Set[int]: + return set(self.ref_to_op_d.keys()) + + + def mapped_query_coordinates(self) -> Set[int]: return set(self.query_to_ref_d.keys()) + def all_query_coordinates(self) -> Set[int]: + return set(self.query_to_op_d.keys()) + + def ref_to_query(self, index) -> Optional[int]: return self.ref_to_query_d.get(index, None) @@ -373,8 +381,8 @@ def intervals_overlap(x, y): def gaps(self) -> Iterable['CigarHit']: # TODO(vitalik): memoize whatever possible. - covered_coordinates = self.coordinate_mapping.reference_coordinates() - all_coordinates = range(self.r_st, self.r_ei + 1) + covered_coordinates = self.coordinate_mapping.mapped_reference_coordinates() + all_coordinates = self.coordinate_mapping.all_reference_coordinates() def make_gap(r_st, r_en): r_ei = r_en - 1