forked from fjruizruano/ngs-protocols
-
Notifications
You must be signed in to change notification settings - Fork 0
/
mreps_extract.py
executable file
·49 lines (41 loc) · 1.01 KB
/
mreps_extract.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#! /usr/bin/python
file = raw_input("MREPS output file: ")
tag = raw_input("Introduce tag (six first characters): ")
data = open(file).readlines()
mark = " " + "-"*93 + "\n"
lista = []
check = 0
seq_id = ""
j = -1
out = open(file+".fas", "w")
out_full = open(file+".full.txt", "w")
# para cada linea en el fichero
for line in data:
j += 1
l = line.find(tag)
# almacena el nombre de la sequencia si aparece
if l != -1:
seq_id = line[21:-2]
seq_id = seq_id.split(" ")
# si comienza la marca, extrae las secuencias hasta la proxima
if line == mark:
# out_full.write(seq_id[0]+"\n")
check +=1
if check%2 == 1:
out_full.write(seq_id[0]+"\n")
k = 0
lista.append(seq_id[0])
for i in range(1,100):
if data[j+i] != mark:
sequen = data[j+i]
sequen = sequen.split("\t\t")
sequen = sequen[-1]
sequen = sequen.split(" ")
sequen = sequen[:-1]
for seq in sequen:
k += 1
out.write(">%s_%s\n%s\n" % (seq_id[0], str(k), seq))
else:
break
out.close()
out_full.close