From c773e0d2d156c2493104d2a56dbbdb4068c88dcc Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Mon, 6 Nov 2023 09:16:39 -0800 Subject: [PATCH] Contig stitcher: ensure the order of stitched contigs Instead of appending the newly stitched part to the end, prepend it at the start. This way we make sure that it will be processed on the next loop cycle. --- micall/core/contig_stitcher.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/micall/core/contig_stitcher.py b/micall/core/contig_stitcher.py index f4a057ec6..8f33a7d2d 100644 --- a/micall/core/contig_stitcher.py +++ b/micall/core/contig_stitcher.py @@ -199,13 +199,14 @@ def stitch_contigs(contigs: Iterable[GenotypedContig]): stitched = yield from (x for x in aligned if not isinstance(x, AlignedContig)) aligned = [x for x in aligned if isinstance(x, AlignedContig)] + # Going left-to-right through aligned contigs. + aligned = list(sorted(aligned, key=lambda x: x.alignment.r_st)) while aligned: - # Going left-to-right through aligned parts. - current = min(aligned, key=lambda x: x.alignment.r_st) - aligned.remove(current) + current = aligned.pop(0) # Filter out all contigs that are contained within the current one. # TODO: actually filter out if covered by multiple contigs + # TODO: split contigs that have big gaps in them first, otherwise they will cover too much. aligned = [x for x in aligned if not \ interval_contains((current.alignment.r_st, current.alignment.r_ei), (x.alignment.r_st, x.alignment.r_ei))] @@ -216,7 +217,7 @@ def stitch_contigs(contigs: Iterable[GenotypedContig]): yield current continue - # Get overlaping regions + # Replace two contigs by their stitched version, then loop with it. new_contig = stitch_2_contigs(current, overlapping_contig) aligned.remove(overlapping_contig) - aligned.append(new_contig) + aligned.insert(0, new_contig)