diff --git a/micall/tests/test_contig_stitcher.py b/micall/tests/test_contig_stitcher.py index 9eb9b2edf..1ce6448b3 100644 --- a/micall/tests/test_contig_stitcher.py +++ b/micall/tests/test_contig_stitcher.py @@ -17,10 +17,10 @@ stitch_consensus, calculate_concordance, align_all_to_reference, - disambiguate_concordance, lstrip, rstrip, ) +from micall.utils.overlap_stitcher import disambiguate_concordance from micall.core.plot_contigs import plot_stitcher_coverage from micall.tests.utils import mock_align_consensus, MockAlignment, fixed_random_seed from micall.tests.test_fasta_to_csv import check_hcv_db, DEFAULT_DATABASE # activates the fixture diff --git a/micall/utils/overlap_stitcher.py b/micall/utils/overlap_stitcher.py index 59b21cb03..54d80926b 100644 --- a/micall/utils/overlap_stitcher.py +++ b/micall/utils/overlap_stitcher.py @@ -1,5 +1,6 @@ from fractions import Fraction from typing import Sequence, Iterator, Tuple +from operator import itemgetter from gotoh import align_it @@ -76,4 +77,9 @@ def disambiguate_concordance(concordance: Sequence[Fraction], yield x, global_rank -# def get_max_ +def sort_concordance_indexes(concordance: Sequence[Fraction]) -> Sequence[int]: + concordance_d = disambiguate_concordance(concordance) + return tuple(i for i, v in sorted(enumerate(concordance_d), + key=itemgetter(1), + reverse=True, + )) diff --git a/micall/utils/referencefull_contig_stitcher.py b/micall/utils/referencefull_contig_stitcher.py index 0bb26c0bc..ed094bfe1 100644 --- a/micall/utils/referencefull_contig_stitcher.py +++ b/micall/utils/referencefull_contig_stitcher.py @@ -10,7 +10,6 @@ from Bio import Seq import logging from fractions import Fraction -from operator import itemgetter from aligntools import CigarHit, connect_nonoverlapping_cigar_hits, drop_overlapping_cigar_hits, CigarActions from micall.core.project_config import ProjectConfig @@ -18,7 +17,7 @@ from micall.utils.contig_stitcher_context import context, StitcherContext from micall.utils.contig_stitcher_contigs import GenotypedContig, AlignedContig from micall.utils.consensus_aligner import align_consensus -from micall.utils.overlap_stitcher import align_queries, calculate_concordance, disambiguate_concordance +from micall.utils.overlap_stitcher import align_queries, calculate_concordance, sort_concordance_indexes import micall.utils.contig_stitcher_events as events @@ -281,11 +280,7 @@ def find_overlapping_contig(self: AlignedContig, aligned_contigs): def concordance_to_cut_points(left_overlap, right_overlap, aligned_left, aligned_right, concordance): """ Determine optimal cut points for stitching based on sequence concordance in the overlap region. """ - concordance_d = list(disambiguate_concordance(concordance)) - sorted_concordance_indexes = [i for i, v in sorted(enumerate(concordance_d), - key=itemgetter(1), - reverse=True, - )] + sorted_concordance_indexes = sort_concordance_indexes(concordance) def remove_dashes(s: str): return s.replace('-', '')