Skip to content

Commit

Permalink
Cigar tools: distinguish mapped coordinates and all coordinates
Browse files Browse the repository at this point in the history
  • Loading branch information
Donaim committed Nov 11, 2023
1 parent d1f3a9d commit c389ee4
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 17 deletions.
5 changes: 2 additions & 3 deletions micall/core/contig_stitcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,9 +287,8 @@ def find_most_covered(contigs) -> Optional[AlignedContig]:
def split_contigs_with_gaps(contigs: List[AlignedContig]) -> Iterable[AlignedContig]:
def covered_by(gap, contig):
# TODO(vitalik): implement the more precise check
possible_reference_coordinates = set(range(gap.r_st, gap.r_ei + 1))
return possible_reference_coordinates \
.issubset(contig.alignment.coordinate_mapping.reference_coordinates())
return gap.coordinate_mapping.all_reference_coordinates() \
.issubset(contig.alignment.coordinate_mapping.mapped_reference_coordinates())

def covered(contig, gap):
return any(covered_by(gap, other) for other in contigs
Expand Down
20 changes: 10 additions & 10 deletions micall/tests/test_cigar_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,13 @@
('5M3I4M', {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 8, 6: 9, 7: 10, 8: 11},
{0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 8, 6: 9, 7: 10, 8: 11}),
('1M1D', {0: 0},
{0: 0}),
{0: 0, 1: 0}),
('1M1I', {0: 0},
{0: 0}),
('1I1M', {0: 1},
{0: 1}),
('1D1M', {1: 0},
{1: 0}),
{1: 0, 0: 0}),

# Multiple deletions and insertions
('2M2D2M2I2M', {0: 0, 1: 1, 4: 2, 5: 3, 6: 6, 7: 7},
Expand All @@ -62,7 +62,8 @@ def test_cigar_to_coordinate_mapping(cigar_str, expected_mapping):
mapping = Cigar.coerce(cigar_str).coordinate_mapping

assert expected_mapping == mapping.ref_to_query_d
assert expected_mapping == {i: mapping.ref_to_query(i) for i in mapping.reference_coordinates()}
assert expected_mapping == {i: mapping.ref_to_query(i)
for i in mapping.mapped_reference_coordinates()}


@pytest.mark.parametrize("cigar_str", [x[0] for x in cigar_mapping_cases])
Expand All @@ -86,9 +87,8 @@ def test_cigar_to_closest_coordinate_mapping(cigar_str, expected_closest_mapping
mapping.ref_to_closest_query(0)

else:
fullrange = {i: mapping.ref_to_closest_query(i) \
for i in range(min(mapping.reference_coordinates()),
1 + max(mapping.reference_coordinates()))}
fullrange = {i: mapping.ref_to_closest_query(i)
for i in mapping.all_reference_coordinates()}
assert expected_closest_mapping == fullrange


Expand All @@ -103,7 +103,8 @@ def test_cigar_hit_to_coordinate_mapping(cigar_str, expected_mapping):
assert mapping.ref_to_query(0) == None
assert mapping.query_to_ref(0) == None
assert expected_mapping \
== {i: mapping.ref_to_query(i) for i in mapping.reference_coordinates()}
== {i: mapping.ref_to_query(i)
for i in mapping.mapped_reference_coordinates()}


@pytest.mark.parametrize("cigar_str, expected_closest_mapping", [(x[0], x[2]) for x in cigar_mapping_cases])
Expand All @@ -119,9 +120,8 @@ def test_cigar_hit_to_coordinate_closest_mapping(cigar_str, expected_closest_map
else:
# Coordinates are translated by q_st and r_st.
expected_closest_mapping = {k + hit.r_st: v + hit.q_st for (k, v) in expected_closest_mapping.items()}
fullrange = {i: mapping.ref_to_closest_query(i) \
for i in range(min(mapping.reference_coordinates()),
1 + max(mapping.reference_coordinates()))}
fullrange = {i: mapping.ref_to_closest_query(i)
for i in mapping.all_reference_coordinates()}
assert expected_closest_mapping == fullrange


Expand Down
16 changes: 12 additions & 4 deletions micall/utils/cigar_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,14 +60,22 @@ def extend(self,
self.query_to_op_d[query_index] = op_index


def reference_coordinates(self) -> Set[int]:
def mapped_reference_coordinates(self) -> Set[int]:
return set(self.ref_to_query_d.keys())


def query_coordinates(self) -> Set[int]:
def all_reference_coordinates(self) -> Set[int]:
return set(self.ref_to_op_d.keys())


def mapped_query_coordinates(self) -> Set[int]:
return set(self.query_to_ref_d.keys())


def all_query_coordinates(self) -> Set[int]:
return set(self.query_to_op_d.keys())


def ref_to_query(self, index) -> Optional[int]:
return self.ref_to_query_d.get(index, None)

Expand Down Expand Up @@ -373,8 +381,8 @@ def intervals_overlap(x, y):
def gaps(self) -> Iterable['CigarHit']:
# TODO(vitalik): memoize whatever possible.

covered_coordinates = self.coordinate_mapping.reference_coordinates()
all_coordinates = range(self.r_st, self.r_ei + 1)
covered_coordinates = self.coordinate_mapping.mapped_reference_coordinates()
all_coordinates = self.coordinate_mapping.all_reference_coordinates()

def make_gap(r_st, r_en):
r_ei = r_en - 1
Expand Down

0 comments on commit c389ee4

Please sign in to comment.