-
Notifications
You must be signed in to change notification settings - Fork 9
/
12an.py
81 lines (65 loc) · 1.64 KB
/
12an.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
### 12an.py - script to add an identifier based non-redundancy - /Users/vgupta/Desktop/script/python
import os,sys,getopt, re
### Usage
'''
python ~/Desktop/script/python/12an.py -f mature_query.txt -i LjMiRCan
'''
### main argument to
def options(argv):
file = ''
identifier = ''
try:
opts, args = getopt.getopt(argv,"hf:i:",["file=","identifier="])
except getopt.GetoptError:
print '''
python 12an.py
-f <file>
-i <identifier>
'''
sys.exit(2)
for opt, arg in opts:
if opt == '-h':
print '''
python 12an.py
-p <file>
-s <identifier>
'''
sys.exit()
elif opt in ("-f", "--file"):
file = arg
elif opt in ("-i", "--identifier"):
identifier = arg
return file, identifier
def hash_lines():
hash = {}
first_line = True
for line in open(file,'r'):
line = line.strip()
if first_line == False:
if line in hash:
hash[line] += 1
else:
hash[line] = 1
first_line = False
hash_new = {}
count = 0
for key in hash:
count += 1
iden = identifier +'_'+str(str(count).zfill(3))
hash_new[key] = iden
return hash,hash_new
def process():
first_line = True
for line in open(file,'r'):
line = line.strip()
if first_line == False:
print line + '\t' + str(hash_new[line])+ '\t' + str(hash[line])
else:
print 'Sequence'+'\t'+'identifier'+'\t'+'Redundancy'
first_line = False
if __name__ == "__main__":
file, identifier = options(sys.argv[1:])
### add identifier
hash,hash_new = hash_lines()
### print identifier
process()