forked from fjruizruano/satminer
-
Notifications
You must be signed in to change notification settings - Fork 1
/
SE_rexp_prepare.py
executable file
·117 lines (91 loc) · 3.02 KB
/
SE_rexp_prepare.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#!/usr/bin/python
import sys, os
from subprocess import call
from random import randint
print "\nUsage: SE_rexp_prepare.py NumberOfPairedReads File.fastq MinQual MinLen [PREFIX]\n"
##read parameters
#number of reads
try:
sel_reads = sys.argv[1]
sel_reads = int(sel_reads)
except:
sel_reads = raw_input("Number of reads you want select: ")
sel_reads = int(sel_reads)
#FASTQ files
try:
r1 = sys.argv[2]
except:
r1 = raw_input("FASTQ file 1: ")
try:
mq = sys.argv[3]
ml = sys.argv[4]
except:
ml = 101
mq = 30
#prefix
try:
prefix = sys.argv[5]
except:
prefix = ""
#list of files in the current directory
files = [f for f in os.listdir(".") if os.path.isfile(f)]
#files names
rr1 = r1.find(".")
suffix = r1[rr1:]
rr1 = r1[:rr1]
#rrr1 = r1.find("_")
#rrr1 = r1[:rrr1]
print rr1
#with open(r1) as myfile:
# head = [next(myfile) for x in xrange(2)]
#len_reads = str(len(head[1])-1)
#Trimming with Trimmomatic
#trimmomatic = "trimmomatic SE -phred33 %s %s ILLUMINACLIP:/usr/local/lib/Trimmomatic-0.32/adapters/TruSeq3-PE.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:%s MINLEN:%s" % (r1, rr1+"_paired.fastq", mq, ml)
trimmomatic = "trimmomatic SE -threads 4 -phred33 %s %s ILLUMINACLIP:/homes/ashah/install_files/Trimmomatic-0.36/adapters/TruSeq3-SE.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:%s MINLEN:%s" % (r1, rr1+"_single_end.fastq", mq, ml)
print trimmomatic
#random selection of reads
random_seed=randint(1,1000000)
fastq_random_1 = "seqtk sample -s %s %s %s > %s" % (random_seed,rr1+"_single_end.fastq",sel_reads,rr1+"_single_end.fastq.subset")
#convert fastq to fasta
fastq_to_fasta = "seqtk seq -a %s > %s" % (rr1+"_single_end.fastq.subset",rr1+"_all_"+prefix+str(sel_reads)+".fasta")
#Try to run Trimmomatic
if rr1+"_single_end.fastq" not in files:
try:
print "Running Trimmomatic\n"
print trimmomatic
call(trimmomatic, shell = True)
except:
print "Trimmomatic could not run. Try again.\n"
else:
print "Trimmomatic was already run. Skipping.\n"
#Try to run fastq pe random
try:
print "Running Fastq-random"
print fastq_random_1
call(fastq_random_1, shell = True)
except:
print "Fastq-random could not run. Try again.\n"
#Try to convert to fasta format
try:
print "Running Fastq to fasta"
print fastq_to_fasta
call(fastq_to_fasta, shell = True)
except:
print "Fastq to fasta could not run. Try again.\n"
#open output
fatemp = open("%s_all_%s%s_temp.fasta" % (rr1,prefix,str(sel_reads)) ,"w")
#read modified fasta file
print "Editing "+ rr1+"_all_"+prefix+str(sel_reads)+".fastq\n"
fasta = open(rr1+"_all_"+prefix+str(sel_reads)+".fasta").readlines()
#add prefix
for x in range(0,len(fasta)):
if x%2 == 0:
line = fasta[x]
line = line.split(">")
line = ">%s%s" % (prefix,line[1])
else:
line = fasta[x]
fatemp.write(line)
fatemp.close()
call("mv %s %s" % (rr1+"_all_"+prefix+str(sel_reads)+"_temp.fasta",rr1+"_all_"+prefix+str(sel_reads)+".fasta"), shell=True)
print "\nWe\'re done!\n"