-
Notifications
You must be signed in to change notification settings - Fork 6
/
group.py
executable file
·118 lines (110 loc) · 4.34 KB
/
group.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/usr/bin/env python
''' Author : Huy Nguyen
Program : Automate create group dic for the chosen taxa, write it to a file
I classify them by class
Start : 05/10/2016
End : 05//2016
'''
import os
import argparse
import uuid
from Bio import SeqIO
import sys
# traverse and get the file
def traverseAll(path):
res=[]
for root,dirs,files in os.walk(path):
for f in files:
res.append(root+'/'+f)
return res
class readable_dir(argparse.Action):
def __call__(self,parser, namespace, values, option_string=None):
prospective_dir=values
if not os.path.isdir(prospective_dir):
try:
os.mkdir(prospective_dir)
except OSError:
print((argparse.ArgumentTypeError("readable_dir:{0} is not a readable dir".format(prospective_dir))))
if os.access(prospective_dir, os.R_OK):
setattr(namespace,self.dest,prospective_dir)
else:
raise argparse.ArgumentTypeError("readable_dir:{0} is not a readable dir".format(prospective_dir))
def get_arguments():
parser = argparse.ArgumentParser()
parser.add_argument("--InputGenBackDirectory","-i",action=readable_dir,help="Genbank directory")
parser.add_argument("--OutputFile","-o", help="Output of this program will be stored in the path supplied here. It will make a new directory if path given is valid or it will raise an error")
parser.add_argument("--AccessionNumber","-a", help="Accession name (phylo_order.txt)")
args = parser.parse_args()
return args
def chk_output_directory_path(OutputDirectory,sessionID):
if not os.path.exists(OutputDirectory + "_" + str(sessionID)):
try:
#os.mkdir(OutputDirectory + "_" + str(sessionID))
return True
except OSError:
print(("Unable to create file:", OutputDirectory))
sys.exit()
def parse_accession(myFile):
accession = open(myFile,'r')
filter_acession = []
for line in accession.readlines():
accession_number = line.split('\n')[0]
filter_acession.append(accession_number)
accession.close()
return filter_acession
if __name__ == "__main__":
args = get_arguments()
sessionID = uuid.uuid1()
condition = chk_output_directory_path(args.OutputFile,sessionID)
accessions=args.AccessionNumber
myclass={0:set(),1:set(),2:set(),3:set(),4:set(),5:set(),6:set(),7:set()} # keep track of class
class_dic={} # key is accesion number, value is the class
# color avaliable for SVG_Color of ete3
color_list=['hotpink','deepskyblue','black','brown','yellow','magenta','purple',
'green','mediumblue','silver']
color_dic={}
if condition:
if args.AccessionNumber=="None":
accession = None
else:
accession = parse_accession(args.AccessionNumber)
outputsession = args.OutputFile
res = traverseAll(args.InputGenBackDirectory)
for r in res:
print (r)
input_seq_iterator = SeqIO.parse(r, "genbank")
first_rec = next(input_seq_iterator)
accession_num= first_rec.annotations['accessions'][0]
# check if the file name is in our ancession file
if accession:
if accession_num not in accession: # NC_000964.gbk
continue
print (accession_num)
taxonomy = first_rec.annotations['taxonomy']
index = 0
for item in taxonomy:
myclass[index].add(item)
index+=1
class_dic[accession_num]=taxonomy
# assign the collor to the class
for group in sorted(myclass):
if len(myclass[group])>=4:
break
print (myclass)
index =0
for item in myclass[group]:
# randomly choose a color
# get the color
color_dic[item]=color_list[index]
# remove the color
color_list.remove(color_list[index])
print (color_dic)
# assign the accession the the color:
for key in class_dic:
color_dic[key]=color_dic[class_dic[key][group]]
# writing to outfile
outfile=open(outputsession,'w')
for key in color_dic:
string = key +':'+ color_dic[key]+'\n'
outfile.write(string)
outfile.close()