-
Notifications
You must be signed in to change notification settings - Fork 5
/
blat_recursive_hard.py
executable file
·54 lines (45 loc) · 1.43 KB
/
blat_recursive_hard.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#!/usr/bin/python
import sys
from subprocess import call, Popen
from os import listdir
from os.path import isfile, join
print "\nUsage: blat_recursive.py NumberOfThreads ListOfSequences Reference\n"
try:
threads = sys.argv[1]
except:
threads = raw_input("Introduce number of threads: ")
try:
li = sys.argv[2]
except:
li = raw_input("Introduce list name: ")
try:
db = sys.argv[3]
except:
db = raw_input("Introduce DB in FASTA format: ")
thr = int(threads)
lis = open(li).readlines()
for file in lis:
file = file[:-1]
call("FastA.split.pl %s tmp_queries %s" % (file, thr), shell=True)
# call("faSplit sequence %s %s tmp_queries_" % (file, thr), shell=True)
onlyfiles = [f for f in listdir(".") if isfile(join(".",f))]
splits = []
for f in onlyfiles:
if f.startswith("tmp_queries") and f.endswith(".fa"):
splits.append(f)
splits.sort()
commands = []
for n in range(0,len(splits)):
com = "blat -noHead -minIdentity=95 %s %s %s" % (db,splits[n],splits[n]+".blat")
commands.append(com)
processes = [Popen(cmd, shell=True) for cmd in commands]
for p in processes:
p.wait()
w = open(file+".blat", "w")
blat = open(splits[0]+".blat").readlines()
w.write("".join(blat))
for n in range(1,len(splits)):
blat = open(splits[n]+".blat").readlines()
w.write("".join(blat[5:]))
w.close()
call("rm tmp_queries*", shell=True)