Skip to content

Commit

Permalink
Contig stitcher: add the stitch_consensus function
Browse files Browse the repository at this point in the history
  • Loading branch information
Donaim committed Nov 14, 2023
1 parent 751d8f2 commit 57805fa
Show file tree
Hide file tree
Showing 2 changed files with 180 additions and 63 deletions.
37 changes: 28 additions & 9 deletions micall/core/contig_stitcher.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from typing import Iterable, Optional, Tuple, List
from collections import deque
from collections import deque, defaultdict
from dataclasses import dataclass
from math import ceil
from mappy import Aligner
Expand Down Expand Up @@ -61,20 +61,22 @@ def cut_query(self, cut_point: float) -> Tuple['GenotypedContig', 'GenotypedCont
class AlignedContig(GenotypedContig):

def __init__(self, query: GenotypedContig, alignment: CigarHit):
self.alignment = alignment
self.query = query

ref_msa, query_msa = self.alignment.to_msa(self.query.ref_seq, self.query.seq)
seq = ''.join((c for c in query_msa if c != '-'))

self.alignment = alignment
super().__init__(
seq = seq,
seq = query.seq,
name = query.name,
ref_name = query.ref_name,
ref_seq = query.ref_seq,
matched_fraction = query.matched_fraction)


@cached_property
def aligned_seq(self):
ref_msa, query_msa = self.alignment.to_msa(self.query.ref_seq, self.query.seq)
return ''.join((c for c in query_msa if c != '-'))


def cut_reference(self, cut_point: float) -> Tuple['AlignedContig', 'AlignedContig']:
""" Cuts this alignment in two parts with cut_point between them. """

Expand Down Expand Up @@ -165,8 +167,8 @@ def cut_reference(self, cut_point: float) -> 'FrankensteinContig':

@staticmethod
def munge(left: AlignedContig, right: AlignedContig) -> AlignedContig:
left_query_seq = left.query.seq[0:left.alignment.q_ei + 1]
right_query_seq = right.query.seq[right.alignment.q_st:]
left_query_seq = left.seq[0:left.alignment.q_ei + 1]
right_query_seq = right.seq[right.alignment.q_st:]
query_seq = left_query_seq + right_query_seq

left_alignment = left.alignment
Expand Down Expand Up @@ -417,3 +419,20 @@ def stitch_contigs(contigs: Iterable[GenotypedContig]) -> Iterable[AlignedContig
aligned = drop_completely_covered(aligned)
aligned = combine_overlaps(aligned)
yield from aligned


def stitch_consensus(contigs: Iterable[GenotypedContig]) -> Iterable[GenotypedContig]:
contigs = list(stitch_contigs(contigs))
consensus_parts = defaultdict(list) # ref_name -> List[AlignedContig]

for contig in contigs:
if isinstance(contig, AlignedContig):
consensus_parts[contig.ref_name].append(contig)
else:
yield contig

Check warning on line 432 in micall/core/contig_stitcher.py

View check run for this annotation

Codecov / codecov/patch

micall/core/contig_stitcher.py#L432

Added line #L432 was not covered by tests

def combine(contigs):
contigs = sorted(contigs, key=lambda x: x.alignment.r_st)
return FrankensteinContig(contigs)

yield from map(combine, consensus_parts.values())
Loading

0 comments on commit 57805fa

Please sign in to comment.