From bf2e50e81aa3cb60067b215844cbe008c885e408 Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Tue, 7 Nov 2023 08:44:26 -0800 Subject: [PATCH] Add MockAligner class to test utils --- micall/tests/test_tests_utils.py | 75 ++++++++++++++++++++++++++++++++ micall/tests/utils.py | 56 ++++++++++++++++++++++++ 2 files changed, 131 insertions(+) create mode 100644 micall/tests/test_tests_utils.py create mode 100644 micall/tests/utils.py diff --git a/micall/tests/test_tests_utils.py b/micall/tests/test_tests_utils.py new file mode 100644 index 000000000..047868475 --- /dev/null +++ b/micall/tests/test_tests_utils.py @@ -0,0 +1,75 @@ + +import pytest +from micall.tests.utils import MockAligner, MockAlignment + +def test_basic_mapping(): + + aligner = MockAligner('acgt' + 'a' * 20 + 'acgt') + + alignment = list(aligner.map('a' * 10)) + + assert len(alignment) == 5 + + alignment = alignment[0] + + assert isinstance(alignment, MockAlignment) + assert alignment.mapq == 60 + assert alignment.is_rev == False + assert alignment.r_st == 4 + assert alignment.r_en == 14 + assert alignment.q_st == 0 + assert alignment.q_en == 10 + + +def test_exact_match(): + aligner = MockAligner("abcdefg") + alignments = list(aligner.map("abc")) + assert len(alignments) == 1 + assert alignments[0].r_st == 0 + assert alignments[0].r_en == 3 + + +def test_no_match(): + aligner = MockAligner("abcdefg") + alignments = list(aligner.map("xyz")) + assert len(alignments) == 0 + + +def test_partial_match(): + aligner = MockAligner("abcdefg") + alignments = list(aligner.map("abxyabc")) + assert len(alignments) == 1 + assert alignments[0].r_st == 0 + assert alignments[0].r_en == 3 + + +def test_multiple_matches(): + aligner = MockAligner("A" * 40) + alignments = list(aligner.map("A" * 20)) + assert len(alignments) == 5 + assert alignments[0].r_st == 0 + assert alignments[0].r_en == 20 + assert alignments[1].r_st == 0 + assert alignments[1].r_en == 19 + + +def test_multiple_matches_bigger_query(): + aligner = MockAligner("A" * 40) + alignments = list(aligner.map("A" * 50)) + assert len(alignments) == 5 + assert alignments[0].r_st == 0 + assert alignments[0].r_en == 40 + assert alignments[1].r_st == 0 + assert alignments[1].r_en == 40 + + +def test_empty_reference(): + aligner = MockAligner("A" * 0) + alignments = list(aligner.map("A" * 20)) + assert len(alignments) == 0 + + +def test_empty_query(): + aligner = MockAligner("A" * 40) + alignments = list(aligner.map("A" * 0)) + assert len(alignments) == 0 diff --git a/micall/tests/utils.py b/micall/tests/utils.py new file mode 100644 index 000000000..1a569e1d5 --- /dev/null +++ b/micall/tests/utils.py @@ -0,0 +1,56 @@ +from dataclasses import dataclass +from math import floor, ceil + +from micall.utils.consensus_aligner import CigarActions + + +@dataclass +class MockAlignment: + is_rev: bool + mapq: int + cigar: list + cigar_str: str + q_st: int + q_en: int + r_st: int + r_en: int + + +class MockAligner: + """ + Mock for the mappy's aligner class. + Only reports exact matches. + """ + + def __init__(self, seq, *args, **kwargs): + self.seq = seq + self.max_matches = 5 + self.min_length = 3 + + + def map(self, seq): + max_matches = self.max_matches + returned = set() + for length in range(len(seq), self.min_length - 2, -1): + for start in range(len(seq) - length): + end = start + length + substring = seq[start:end+1] + if substring not in self.seq: + continue + + mapq = 60 + is_rev = False # Doesn't handle reverse complements in this mock. + r_st = self.seq.index(substring) + r_en = r_st + len(substring) + q_st = start + q_en = end + 1 + cigar = [[q_en - q_st, CigarActions.MATCH]] + cigar_str = f'{(q_en - q_st)}M' + al = MockAlignment(is_rev, mapq, cigar, cigar_str, q_st, q_en, r_st, r_en) + if (q_st, q_en, r_st, r_en) not in returned: + returned.add((q_st, q_en, r_st, r_en)) + yield MockAlignment(is_rev, mapq, cigar, cigar_str, q_st, q_en, r_st, r_en) + + max_matches -= 1 + if max_matches < 1: + return