Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Made insertion motif optional, expanded valid insertion sequence check and added support for target site deletions #216

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 9 additions & 6 deletions bin/addsv.py
Original file line number Diff line number Diff line change
Expand Up @@ -528,18 +528,21 @@ def makemut(args, bedline, alignopts):
assert len(a) > 1 # insertion syntax: INS <file.fa> [optional TSDlen]
insseqfile = a[1]
if not (os.path.exists(insseqfile) or insseqfile == 'RND' or insseqfile.startswith('INSLIB:')): # not a file... is it a sequence? (support indel ins.)
assert re.search('^[ATGCatgc]*$',insseqfile), "cannot determine SV type: %s" % insseqfile # make sure it's a sequence
assert re.search('^[ATGCURYKMSWBDHVNatgcurykmswbdhvn]*$',insseqfile), "cannot determine SV type: %s" % insseqfile # make sure it's a sequence
insseq = insseqfile.upper()
insseqfile = None
if len(a) > 2: # field 5 for insertion is TSD Length
tsdlen = int(a[2])

if len(a) > 3: # field 6 for insertion is motif, format = 'NNNN^NNNN where ^ is cut site
ins_motif = a[3]
assert '^' in ins_motif, 'insertion motif specification requires cut site defined by ^'
if len(a) > 3:
try: # field 6 for VAF in case of floating point. This is the end of the fields
svfrac = float(a[3])/cn
except: # otherwise is insertion motif, format = 'NNNN^NNNN where ^ is cut site
ins_motif = a[3]
assert '^' in ins_motif, 'insertion motif specification requires cut site defined by ^'

if len(a) > 4: # field 7 is VAF
svfrac = float(a[4])/cn
if len(a) > 4: # field 7 is VAF when field 6 is insertion motif
svfrac = float(a[4])/cn

if action == 'DUP':
if len(a) > 1:
Expand Down
9 changes: 6 additions & 3 deletions bin/bamsurgeon/mutableseq.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,13 @@ def deletion(self, start, end):
self.seq = self.seq[:start] + self.seq[end:]

def insertion(self, loc, seq, tsdlen=0):
''' inserts seq after position loc, adds taret site duplication (tsd) if tsdlen > 0 '''
''' inserts seq after position loc, adds target site duplication (tsd) if tsdlen > 0 and deletion if tsdlen < 0'''
loc = int(loc)
tsd = self.seq[loc:loc+tsdlen]
self.seq = self.seq[:loc] + tsd + seq + self.seq[loc:]
if tsdlen >= 0:
tsd = self.seq[loc:loc + tsdlen]
self.seq = self.seq[:loc] + tsd + seq + self.seq[loc:]
else:
self.seq = self.seq[:loc] + seq + self.seq[loc - tsdlen:]

def inversion(self, start, end):
''' inverts sequence between start and end, bases at start and end positions are not affected '''
Expand Down