diff --git a/micall/tests/test_cigar_tools.py b/micall/tests/test_cigar_tools.py index 9c9db98d1..b61912207 100644 --- a/micall/tests/test_cigar_tools.py +++ b/micall/tests/test_cigar_tools.py @@ -323,6 +323,19 @@ def test_cigar_hit_rstrip_is_stringlike(hit): assert p(g(hit)) == h(p(hit)) +@pytest.mark.parametrize('hit', [x[0] for x in cigar_hit_ref_cut_cases + if not isinstance(x[2], Exception)]) +def test_cigar_hit_gaps_no_m_or_i(hit): + gaps = list(hit.gaps()) + + if 'D' in str(hit.cigar): + assert len(gaps) > 0 + + for gap in gaps: + assert 'M' not in str(gap.cigar) + assert 'I' not in str(gap.cigar) + + @pytest.mark.parametrize("reference_seq, query_seq, cigar, expected_reference, expected_query", [ ('ACTG', 'ACTG', '4M', 'ACTG', 'ACTG'), ('ACTG', '', '4D', 'ACTG', '----'), diff --git a/micall/utils/cigar_tools.py b/micall/utils/cigar_tools.py index 89ad2d708..0de61b76f 100644 --- a/micall/utils/cigar_tools.py +++ b/micall/utils/cigar_tools.py @@ -262,7 +262,7 @@ def rstrip(self, actions: Container[CigarActions]) -> 'Cigar': def coordinate_mapping(self) -> CoordinateMapping: """ Convert a CIGAR string to coordinate mapping representing a reference-to-query and query-to-reference coordinate mappings. - TODO: describe the domains and holes. + TODO(vitalik): describe the domains and holes. :param cigar: a CIGAR string. @@ -370,6 +370,32 @@ def intervals_overlap(x, y): or intervals_overlap((self.q_st, self.q_ei), (other.q_st, other.q_ei)) + def gaps(self) -> Iterable['CigarHit']: + # TODO(vitalik): memoize whatever possible. + + covered_coordinates = self.coordinate_mapping.reference_coordinates() + all_coordinates = range(self.r_st, self.r_ei + 1) + + def make_gap(r_st, r_en): + r_ei = r_en - 1 + left, midright = self.cut_reference(r_st - 0.5) + middle, right = midright.cut_reference(r_ei + 0.5) + return middle + + gap_start = None + for coord in all_coordinates: + if coord in covered_coordinates: + if gap_start is not None: + yield make_gap(gap_start, coord) + gap_start = None + else: + if gap_start is None: + gap_start = coord + + if gap_start is not None: + yield make_gap(gap_start, coord) + + def __add__(self, other): """ Inserts deletions/insertions between self and other,