-
Notifications
You must be signed in to change notification settings - Fork 6
/
mfold_quik
executable file
·212 lines (189 loc) · 4.41 KB
/
mfold_quik
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
#!/bin/bash
# This shell script folds all RNA or DNA sequences and creates output files.
export _POSIX2_Version=0
export Package=mfold
export Version=3.7
# Abort subroutine
abort(){
rm -f mfold.log fort.*
if [ $# -gt 0 ] ; then
echo -e "$1"
fi
echo "Job Aborted"
exit 1
}
if [ $# = 0 ]; then
echo -e "Usage is\nmfold SEQ='file_name' with optional parameters:
[ AUX='auxfile_name' ]
[ NA=RNA (default) or DNA ] [ LC=sequence type (default = linear) ]
[ T=temperature (default = 37°) ] [ P=percent (default = 5) ]
[ NA_CONC=Na+ molar concentration (default = 1.0) ]
[ MG_CONC=Mg++ molar concentration (default = 0.0) ]
[ W=window parameter (default - set by sequence length) ]
[ MAXBP=max base pair distance (default - no limit) ]
[ MAX=maximum number of foldings to be computed per sequence (default 50) ]"
exit 2
fi
# Set default values.
SEQ=/dev/null
NA=DNA
LC=linear
T=default
P=5
W=-1
MAXBP="no limit"
MAX=50
START=1
STOP=30000
# Process the command line arguments 1 at a time.
COUNT=$#
while [ $COUNT != 0 ]; do
if [ `echo $1 | cut -d= -f1` = "SEQ" ]; then
SEQ=`echo $1 | cut -d= -f2`
elif [ `echo $1 | cut -d= -f1` = "AUX" ]; then
AUX=`echo $1 | cut -d= -f2`
fi
COUNT=`expr $COUNT - 1`
shift
done
NA_CONC=${NA_CONC:-1.0}
MG_CONC=${MG_CONC:-0.0}
# Check for sequence
if [ ! -s $SEQ ]; then
echo "Sequence has not been defined or it does not exist."
exit 1
fi
NUM=`echo $SEQ | tr "." " " | wc | tr -s " " " " | cut -d" " -f3`
SUFFIX=`echo $SEQ | cut -d"." -f$NUM`
if [ $NUM -gt 1 -a $SUFFIX = seq ]; then
NUM=`expr $NUM - 1`
FILE_PREFIX=`echo $SEQ | cut -d"." -f1-$NUM`
else
FILE_PREFIX=$SEQ
fi
# SEQ is redefined to local version but the FILE_PREFIX does not
# contain "local" in it.
FILE_PREFIX=`reformat-seq.sh $SEQ`
SEQ=${FILE_PREFIX}-local.seq
LOGFILE=$FILE_PREFIX.log
rm -f $LOGFILE
# Create con file from aux file (for constrained folding)
AUX=${AUX:-$FILE_PREFIX.aux}
rm -f $FILE_PREFIX.con
# Maximum range for base pairs
if [ "$MAXBP" != "no limit" ]; then
echo -e "9\n" $MAXBP|tr -d " " >> $FILE_PREFIX.con
fi
if [ -s $AUX ]; then
# Single force
grep F $AUX|tr "O" "0"|grep " 0 "|sed 's/F/2+/g'|\
sed 's/ 0 / /g'|tr "+" "\012" >> $FILE_PREFIX.con
# Double force
grep F $AUX|tr "O" "0"|grep -v " 0 "|sed 's/F/3+/g'|\
tr "+" "\012" >> $FILE_PREFIX.con
# Single prohibit
grep P $AUX|tr "O" "0"|grep " 0 "|sed 's/P/6+/g'|\
sed 's/ 0 / /g'|tr "+" "\012" >> $FILE_PREFIX.con
# Double prohibit
grep P $AUX|tr "O" "0"|grep -v " 0 "|grep -v "-"|sed 's/P/7+/g'|\
tr "+" "\012" >> $FILE_PREFIX.con
# Range prohibit
grep P $AUX|grep "[0-9]-[0-9]"|tr "-" " "|sed 's/P/8+/g'|\
tr "+" "\012" >> $FILE_PREFIX.con
fi
# Write out sequence using 'auxgen'
auxgen ${FILE_PREFIX}-local > $LOGFILE 2>&1 || abort "auxgen failed"
mv ${FILE_PREFIX}-local.pnt $FILE_PREFIX.pnt
SEQ_NAME=`head -n1 $FILE_PREFIX.pnt|cut -c1-72|tr -s " " " "|sed 's/^ //'|sed 's/^>//'`
LENGTH=`grep '^#BASES= ' $FILE_PREFIX.pnt|awk '{printf "%d",$2}'`
if [ $STOP -gt $LENGTH ]; then
STOP=$LENGTH
fi
if [ $START -lt 1 ]; then
START=1
fi
LENGTH=`expr $STOP - $START + 1`
# Generate energy tables for temperature T
# EXT is extension for energy file names
# T=default means that T was not specified on the command line.
# This yields version 3.0 energies at 37 degrees (.dat) for RNA
E_Version=2.3
if [ $T = default ]; then
T=37
if [ $NA = RNA ]; then
EXT=dat
E_Version=3.0
else
EXT=37
fi
else
if [ $T -lt 0 ]; then
T=0
elif [ $T -gt 100 ]; then
T=100
fi
EXT=$T
fi
ARG1=`echo $LC | cut -c1`
newtemp >> $LOGFILE 2>&1 <<-EOF || abort "newtemp failed"
$NA
$T
$NA_CONC
$MG_CONC
EOF
# Fold the sequences
# First create a command file.
cat > $FILE_PREFIX.cmd <<-EOF
1
$FILE_PREFIX.sav
$SEQ
1
$START
$STOP
asint1x2.$EXT
asint2x3.$EXT
dangle.$EXT
loop.$EXT
miscloop.$EXT
sint2.$EXT
sint4.$EXT
sint6.$EXT
stack.$EXT
tloop.$EXT
triloop.$EXT
tstackh.$EXT
tstacki.$EXT
1
7
30
8
30
EOF
# Add constraints, if any.
if [ -s $FILE_PREFIX.con ]; then
cat $FILE_PREFIX.con >> $FILE_PREFIX.cmd
fi
# Finish nafold command file.
echo "10" >> $FILE_PREFIX.cmd
nafold $ARG1 text < $FILE_PREFIX.cmd >>$LOGFILE 2>&1 || abort "Fill run failed"
if [ ! -s $FILE_PREFIX.sav ] ; then
abort "Save file is empty. No foldings."
fi
OUT=out
DET=det
nafold $ARG1 text >>$LOGFILE 2>&1 <<EOF || abort "Structures not computed"
2
1
$P
$MAX
$W
$FILE_PREFIX.sav
n
$FILE_PREFIX.$OUT
y
$FILE_PREFIX.ct
y
$FILE_PREFIX.$DET
EOF
# Cleanup
rm -f mfold.log fort.* *.$EXT