forked from kkrizanovic/RNAseqEval
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathadjustFqHeaders.py
executable file
·78 lines (61 loc) · 2.39 KB
/
adjustFqHeaders.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#! /usr/bin/python
import sys, os
# To enable importing from samscripts submodule
SCRIPT_PATH = os.path.dirname(os.path.realpath(__file__))
sys.path.append(os.path.join(SCRIPT_PATH, 'samscripts/src'))
import utility_sam
import Annotation_formats
from fastqparser import read_fastq
def adjustFqHeaders(fastqfile, findStr, replaceStr):
# Reading fastq file
[headers, seqs, quals] = read_fastq(fastqfile)
filename, file_extension = os.path.splitext(fastqfile)
totalSeqs = len(headers)
findLen = len(findStr)
replaceLen = len(replaceStr)
replaced = 0
notreplaced = 0
for i in xrange(totalSeqs):
header = headers[i]
seq = seqs[i] # Not really needed
qual = quals[i] # Not really needed
if header[:findLen] == findStr:
newheader = replaceStr + header[findLen:]
headers[i] = newheader
replaced += 1
else:
notreplaced += 1
with open(fastqfile, 'w') as ffile:
for i in xrange(totalSeqs):
header = headers[i]
seq = seqs[i]
qual = quals[i]
if file_extension.lower() in ['.fa', '.fna', 'faa', '.fasta']:
ffile.write('>' + header + '\n')
ffile.write(seq + '\n')
elif file_extension.lower() in ['.fq', '.fastq']:
ffile.write('@' + header + '\n')
ffile.write(seq + '\n')
ffile.write('+' + header + '\n')
ffile.write(qual + '\n')
else:
ffile.write(r'@ERROR occured. File is NOT COMPLETE!')
raise Exception('Invalid file extension: %s' % file_extension)
return replaced, notreplaced
def verbose_usage_and_exit():
sys.stderr.write('adjustFqHeaders - Replace a string at the beginning of each fastq header\n')
sys.stderr.write('\n')
sys.stderr.write('Usage:\n')
sys.stderr.write('\t%s [Fastq/Fasta file] [find string] [replace string]\n' % sys.argv[0])
sys.stderr.write('\n')
exit(0)
if __name__ == '__main__':
if (len(sys.argv) != 4):
verbose_usage_and_exit()
fastqfile = sys.argv[1]
findStr = sys.argv[2]
replaceStr = sys.argv[3]
replaced, notreplaced = adjustFqHeaders(fastqfile, findStr, replaceStr)
print('\nStatistics:\n')
print('Adjusted headers: %d\n' % replaced)
print('Non adjusted headers: %d\n' % notreplaced)