forked from fjruizruano/ngs-protocols
-
Notifications
You must be signed in to change notification settings - Fork 0
/
rm_count_matches_monomers.py
executable file
·50 lines (40 loc) · 1.12 KB
/
rm_count_matches_monomers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#!/usr/bin/python
import sys
from Bio import SeqIO
print "Usage: rm_count_matches.py FastaFile\n"
try:
fasta = sys.argv[1]
except:
fasta = raw_input ("Introduce FastaFile: ")
seqs = SeqIO.parse(open(fasta),"fasta")
counter_dim = {}
counter_nodim = {}
for seq in seqs:
secu = str(seq.seq)
id = str(seq.id)
info = id.split("_")
read = info[1]
annot = info[0]
# mono_len = annot
# mono_len = mono_len.split("-")
# mono_len = int(mono_len[1])
if annot not in counter_dim:
counter_dim[annot] = [0,0]
counter_nodim[annot] = [0,0]
if len(secu) > 89:
a = counter_dim[annot][0]
b = counter_dim[annot][1]
counter_dim[annot] = [a+1,b+len(secu)]
else:
a = counter_nodim[annot][0]
b = counter_nodim[annot][1]
counter_nodim[annot] = [a+1,b+len(secu)]
out = open(fasta+".counts", "w")
out.write("Annotation\tDIM_N\tDIM_MON\tNODIM_N\tNODIM_MON\n")
for el in counter_dim:
dim = counter_dim[el]
nodim = counter_nodim[el]
lili = dim+nodim
lili = [str(i) for i in lili]
lili = [el]+lili
out.write("\t".join(lili)+"\n")