-
Notifications
You must be signed in to change notification settings - Fork 0
/
extract_orthology2file.py
executable file
·49 lines (40 loc) · 1.08 KB
/
extract_orthology2file.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#! /usr/bin/env python
# -*- coding: utf-8 -*-
# *** author: william wang ***
### extract orthologous gene cluster into files ###
f1 = open('pan_single_copy.I11', 'r')
f2 = open('all42orf.fasta', 'r')
f3 = open('key.txt', 'r')
#orths = f1.readlines()
genes = f2.read().split('>')
keyword = f3.read().split('\t')
i = 1
cluster_keylist = []
cluster_ortholog = ''
for orthlines in f1:
orths = orthlines.strip().split('\t')
for orth in orths:
cluster_keylist.append(orth[:5])
for key in keyword:
if key not in cluster_keylist:
cluster_ortholog += '>'+key+'\n'
else:
for orth in orths:
for gene in genes:
if orth in gene:
cluster_ortholog += '>'+gene+'\n'
for key in keyword:
orthologs = cluster_ortholog.split(">")
for ortholog in orthologs:
if key in ortholog:
f4 = open('single_copy_ortholog%s.fasta' %str(i), 'w')
f4.write('>'+ortholog)
f4.close()
cluster_keylist = []
cluster_ortholog = ''
i += 1
if i > 10: #the number of orthologous clusters.
break
f1.close()
f2.close()
f3.close()