Skip to content

Commit

Permalink
Add gaps() method to CigarHit
Browse files Browse the repository at this point in the history
  • Loading branch information
Donaim committed Nov 10, 2023
1 parent 8b8443d commit 4bee555
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 1 deletion.
13 changes: 13 additions & 0 deletions micall/tests/test_cigar_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,19 @@ def test_cigar_hit_rstrip_is_stringlike(hit):
assert p(g(hit)) == h(p(hit))


@pytest.mark.parametrize('hit', [x[0] for x in cigar_hit_ref_cut_cases
if not isinstance(x[2], Exception)])
def test_cigar_hit_gaps_no_m_or_i(hit):
gaps = list(hit.gaps())

if 'D' in str(hit.cigar):
assert len(gaps) > 0

for gap in gaps:
assert 'M' not in str(gap.cigar)
assert 'I' not in str(gap.cigar)


@pytest.mark.parametrize("reference_seq, query_seq, cigar, expected_reference, expected_query", [
('ACTG', 'ACTG', '4M', 'ACTG', 'ACTG'),
('ACTG', '', '4D', 'ACTG', '----'),
Expand Down
28 changes: 27 additions & 1 deletion micall/utils/cigar_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ def rstrip(self, actions: Container[CigarActions]) -> 'Cigar':
def coordinate_mapping(self) -> CoordinateMapping:
"""
Convert a CIGAR string to coordinate mapping representing a reference-to-query and query-to-reference coordinate mappings.
TODO: describe the domains and holes.
TODO(vitalik): describe the domains and holes.
:param cigar: a CIGAR string.
Expand Down Expand Up @@ -370,6 +370,32 @@ def intervals_overlap(x, y):
or intervals_overlap((self.q_st, self.q_ei), (other.q_st, other.q_ei))


def gaps(self) -> Iterable['CigarHit']:
# TODO(vitalik): memoize whatever possible.

covered_coordinates = self.coordinate_mapping.reference_coordinates()
all_coordinates = range(self.r_st, self.r_ei + 1)

def make_gap(r_st, r_en):
r_ei = r_en - 1
left, midright = self.cut_reference(r_st - 0.5)
middle, right = midright.cut_reference(r_ei + 0.5)
return middle

gap_start = None
for coord in all_coordinates:
if coord in covered_coordinates:
if gap_start is not None:
yield make_gap(gap_start, coord)
gap_start = None
else:
if gap_start is None:
gap_start = coord

if gap_start is not None:
yield make_gap(gap_start, coord)


def __add__(self, other):
"""
Inserts deletions/insertions between self and other,
Expand Down

0 comments on commit 4bee555

Please sign in to comment.