-
Notifications
You must be signed in to change notification settings - Fork 2
/
tomtom.py
116 lines (109 loc) · 5.34 KB
/
tomtom.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#################################################################
# @Program: tomtom.py #
# @Version: 1 #
# @Author: Chris Plaisier #
# @Sponsored by: #
# Nitin Baliga, ISB #
# Institute for Systems Biology #
# 1441 North 34th Street #
# Seattle, Washington 98103-8904 #
# (216) 732-2139 #
# @Also Sponsored by: #
# Luxembourg Systems Biology Grant #
# #
# If this program is used in your analysis please mention who #
# built it. Thanks. :-) #
# #
# Copyrighted by Chris Plaisier 12/4/2009 #
#################################################################
import os
from subprocess import *
# A class designed to compute the difference between two PSSM matrices.
#
# Variables:
#
#
# Functions:
# getScore(self,queryPssm,targetPssm)
# getAllScores(self)
#
class tomtom:
# Initialize and start the run
def __init__(self,queryPssms,targetPssms,nucFreqs,strands='+ -',distMeth='ed',qThresh=1,minOverlap=0):
# Set some parameters for analysis
self.queryPssms = queryPssms
self.targetPssms = targetPssms
self.nucFreqs = nucFreqs
self.strands = strands
self.distMeth = distMeth
self.qThresh = qThresh
self.minOverlap = minOverlap
# Setup for a tomtom run
self.makeFiles()
# Run tomtom and save data into local data structures
print 'Starting TOMTOM...'
self.runTomTom()
print 'Done.\n'
# Make the files for a TomTom run
def makeFiles(self):
if not os.path.exists('tmp'):
os.mkdir('tmp')
# Header crap
memeHeader = ''
memeHeader += 'MEME version 3.0\n\n'
memeHeader += 'ALPHABET= ACGT\n\n'
# Here is where we tell it what strand: for miRNAs this would just be '+'
memeHeader += 'strands: '+self.strands+'\n\n'
memeHeader += 'Background letter frequencies (from dataset with add-one prior applied):\n'
memeHeader += 'A '+str(round(float(self.nucFreqs['A']),3))+' C '+str(round(float(self.nucFreqs['C']),3))+' G '+str(round(float(self.nucFreqs['G']),3))+' T '+str(round(float(self.nucFreqs['T']),3))
#extras = ['R','Y','K','M','S','W','B','D','H','V','N']
#for e in extras:
# memeHeader += ' '+str(e)+' 0.000'
# Make query PSSM file
queryFile = open('tmp/query.tomtom','w')
queryFile.write(memeHeader)
for pssm in self.queryPssms:
queryFile.write('\n\n'+pssm.getMemeFormatted())
queryFile.close()
# Make target PSSM file
targetFile = open('tmp/target.tomtom','w')
targetFile.write(memeHeader)
for pssm in self.targetPssms:
targetFile.write('\n\n'+pssm.getMemeFormatted())
targetFile.close()
# Run TomTom on the files
def runTomTom(self):
if not os.path.exists('tmp/tomtom_out'):
os.mkdir('tmp/tomtom_out')
# Arguments for tomtom
tomtomArgs = ' -query tmp/query.tomtom -target tmp/target.tomtom -dist '+str(self.distMeth)+' -o tmp/tomtom_out -text -q-thresh '+str(self.qThresh)+' -min-overlap '+str(self.minOverlap)+' -verbosity 0'
print tomtomArgs
#p = Popen("tomtom" + tomtomArgs, shell=True)
#sts = os.waitpid(p.pid, 0)
errOut = open('tmp/stderr.out','w')
tomtomProc = Popen("tomtom" + tomtomArgs, shell=True,stdout=PIPE,stderr=errOut)
output = tomtomProc.communicate()[0].split('\n')
errOut.close()
# Now iterate through output and save data
output.pop(0) # Get rid of header
self.scoreMatrix = {}
while len(output)>0:
outputLine = output.pop(0).strip().split('\t')
if len(outputLine)==9:
if not outputLine[0] in self.scoreMatrix:
self.scoreMatrix[outputLine[0]] = { outputLine[1]: { 'optimalOffset':outputLine[2], 'pValue':outputLine[3], 'qValue':outputLine[4], 'overlap':outputLine[5], 'queryConsensus':outputLine[6], 'targetConsensus':outputLine[7], 'orientation':outputLine[8] } }
else:
self.scoreMatrix[outputLine[0]][outputLine[1]] = { 'optimalOffset':outputLine[2], 'pValue':outputLine[3], 'qValue':outputLine[4], 'overlap':outputLine[5], 'queryConsensus':outputLine[6], 'targetConsensus':outputLine[7], 'orientation':outputLine[8] }
# Get all pair-wise scores
def getAllScores(self):
return self.scoreMatrix
# Get a specific pair-wise ananlysis result
def getScore(self,queryPssmName,targetPssmName):
scores = self.getAllScores()
if not queryPssmName in scores:
return ['No such query pssm name!']
else:
if not targetPssmName in scores[queryPssmName]:
return ['No such target!']
else:
return scores[queryPssmName][targetPssmName]