Skip to content

Commit

Permalink
initial realign private implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
lrauschning committed Nov 15, 2024
1 parent 999c403 commit 51bc687
Showing 1 changed file with 23 additions and 2 deletions.
25 changes: 23 additions & 2 deletions msyd/pyxfiles/realignment.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@ from syri.writeout import getsrtable
import msyd.util as util
import msyd.cigar as cigar
import msyd.intersection as intersection
import msyd.priv as priv
import msyd.io as io
from msyd.multisyn import Multisyn
from msyd.multisyn import Multisyn, Private
from msyd.coords import Range


Expand Down Expand Up @@ -118,6 +119,23 @@ cpdef construct_mts(merasyns, old, syn):
## private regions will be at least MIN_REALIGN long
## => kind of enforced on ref anyway?

cpdef mt_to_privates(mt, org, chrom):
"""
Takes a mappingtree, and returns a DataFrame of all intervals left in it as Private objects.
This fn is used to extract leftover non-realigning regions to annotate as private.
:args:
:mt: input mappingtree, typically obtained from subtract_mappingtrees
:org: organism mt corresponds to
:chrom: chromosome we are working on
"""
cdef ret = deque()
for entry in mappingtree:
if entry.end - entry.start >= priv.MIN_PRIV_THRESH:
ret.append(Private(org, chrom, entry.data, entry.data + entry.end - entry.start))

return pd.DataFrame(list(ret))


cpdef subtract_mts(mappingtrees, merasyns, skip_ref=True):
"""
Takes a dict containing an `Intervaltree` with offsets for each organism to be realigned (as produced by `construct_mts`), and returns new mappingtrees with regions covered by multisyn objects in `merasyns` subtracted.
Expand Down Expand Up @@ -651,12 +669,15 @@ cdef process_gaps(df, qrynames, fastas, mp_preset='asm20', ncores=1, annotate_pr

## recalculate mappingtrees from current merasyns to remove newly found merasynteny
logger.debug(f"Old Mappingtrees: {mappingtrees}.\n Adding {merasyns[ref]}.")
mappingtrees = subtract_mts(mappingtrees, merasyns[ref], skip_ref=True)
mappingtrees = subtract_mts(mappingtrees, merasyns[ref], skip_ref=not annotate_private)
logger.debug(f"New Mappingtrees: {mappingtrees}")

# remove all orgs that have already been used as a reference
for reforg in merasyns:
if reforg in mappingtrees:
if annotate_private:
merasyns[ref] = util.merge(merasyns[ref], mt_to_privates(mappingtrees[reforg], reforg, syn.ref.chr))
#TODO handle early exit cases
del mappingtrees[reforg]

## extract the remaining sequences for future realignment
Expand Down

0 comments on commit 51bc687

Please sign in to comment.