-
Notifications
You must be signed in to change notification settings - Fork 0
/
process.sh
executable file
·293 lines (262 loc) · 8.22 KB
/
process.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
#!/usr/bin/env bash
###############################################################
### XPROCESS: process version 240705 ##
### (C) 2024 Adams Vallejos ##
###############################################################
TITLE=""
PROJECT=""
PROTEIN=""
CELL="${1}"
SPACEGROUP="${2}"
POINTGROUP="${3}"
SAMPLENAME="${4}"
COORDINATES="${5}"
CIFLIB="${6}"
CELLWORK="${7}"
GRIDWORK="${8}"
XYZLIM="${9}"
OUT="$(dirname $SAMPLENAME)"
RFREE=$(dirname $(dirname $(dirname $OUT)))/rfree.mtz
NPROC=10
NCYCLE=20
PARAMETERS="phenix_por.params"
W="\033[0m"
R="\033[31m"
G="\033[32m"
# Y="\033[33m"
# # --- TEST SECTION
# # --- CRYSTFEL SECTION ###################################################
# # --- MERGING REFLECTIONS ---
# # INFO: Merge reflections by scaling and post refinement.
PARTIALATOR=1
# If partialator is true, run process_hkl with split
SPLIT=$((PARTIALATOR^1))
FOM=1
OUT+="/merge"
if [ ${PARTIALATOR} = 1 ]
then
# # --- Run PARTIALATOR ---
# # example: https://www.desy.de/~twhite/crystfel/manual-partialator.html
printf "%s Merging intensities with partialator\n" "$(date +"%y-%m-%d %H:%M")"
if ! partialator \
--input="${SAMPLENAME}" \
--output="${OUT}".hkl \
--symmetry="${POINTGROUP}" \
--iterations=3 \
--model=unity \
-j ${NPROC} \
> "${OUT}".hkl.log 2>&1
then
printf "%bPartialator failed!%b\n" "${R}" "${W}"
exit 1
fi
else
# # --- Run PROCESS_HKL ---
printf "%s Merging intensities with process_hkl\n" "$(date +"%y-%m-%d %H:%M")"
if ! process_hkl \
--input="${SAMPLENAME}" \
--output="${OUT}".hkl \
--scale \
--min-res=3 \
--symmetry="${POINTGROUP}" \
> "${OUT}".hkl.log 2>&1
then
printf "%bprocess_hkl failed!%b\n" "${R}" "${W}"
exit 1
fi
fi
if [ ${SPLIT} = 1 ]
then
# # --- Split half datasets for figures of merit_
# # --- Odd only
# printf "%s Spliting reflections: Odd only\n" "$(date +"%y-%m-%d %H:%M")"
printf "%s Spliting reflections\n" "$(date +"%y-%m-%d %H:%M")"
if ! process_hkl \
--input="${SAMPLENAME}" \
--output="${OUT}".hkl1 \
--odd-only \
--scale \
--min-res=3 \
--max-adu=18500 \
--symmetry="${POINTGROUP}" \
> "${OUT}".hkl1.log 2>&1
then
printf "%bOdd splitting failed!%b\n" "${R}" "${W}"
exit 1
fi
# # --- Even only
# printf "%s Spliting reflections: Even only\n" "$(date +"%y-%m-%d %H:%M")"
if ! process_hkl \
--input="${SAMPLENAME}" \
--output="${OUT}".hkl2 \
--even-only \
--scale \
--min-res=3 \
--max-adu=18500 \
--symmetry="${POINTGROUP}" \
> "${OUT}".hkl2.log 2>&1
then
printf "%bEven slitting failed!%b\n" "${R}" "${W}"
exit 1
fi
else
printf "%s Skipping reflection splitting.\n" "$(date +"%y-%m-%d %H:%M")"
fi
if [ ${FOM} = 1 ]
then
# # --- Compute FOM5n
printf "%s Computing figures of merit\n" "$(date +"%y-%m-%d %H:%M")"
if ! check_hkl "${OUT}".hkl \
-y "${POINTGROUP}" \
-p "${COORDINATES}" \
--shell-file="${OUT}"_check.dat \
--nshells=20 \
> "${OUT}"_check.log 2>&1
then
printf "%bcheck_hkl failed!%b\n" "${R}" "${W}"
exit 1
fi
# printf "%s Computing Rsplit\n" "$(date +"%y-%m-%d %H:%M")"
# Options: R1I, R1F, R2, Rsplit, CC, CCstar, CCano, CRDano, Rano,
# Rano/Rsplit, d1sig, d2sig
for fom in 'ccstar' 'rsplit';
# for fom in {'CChalf', 'ccstar', 'rsplit'};
do
# printf "%s Computing CC*\n" "$(date +"%y-%m-%d %H:%M")"
if ! compare_hkl "${OUT}".hkl1 "${OUT}".hkl2 \
-y "${POINTGROUP}" \
-p "${COORDINATES}" \
--fom="$fom" \
--shell-file="${OUT}"_"$fom".dat \
--nshells=20 \
> "${OUT}"_"$fom".log 2>&1
then
printf "%bcompare_hkl failed!%b\n" "${R}" "${W}"
exit 1
fi
printf "%s\n" "$(grep 'Overall' "${OUT}"_"$fom".log)"
done
# printf "%s\n" "$(grep 'Overall' "${OUT}"_"$fom".log)"
fi
# # --- Remove stream file, resampled crystals logged in yaml file
if ! rm "${SAMPLENAME}"
then
printf "%bSample stream could not be removed%b\n" "${R}" "${W}"
exit 1
fi
# # --- CCP4/PHENIX SECTION ################################################
# # --- Convert HKL to MTZ ------------------------------------------------
# # INFO: Convert a formatted reflection file to MTZ format.
# # Based on CrystFEL's 'create-mtz' , deprecated from version 0.10.1
# # example: ccp4/8.0/examples/unix/runnable/f2mtz.exam
printf "%s Writing mtz\n" "$(date +"%y-%m-%d %H:%M")"
sed -n '/End\ of\ reflections/q;p' "${OUT}".hkl > "${OUT}".temp.hkl
# get_hkl -i file.hkl -p my.cell -o out.mtz --output-format=mtz
if ! f2mtz \
HKLIN "${OUT}".temp.hkl \
HKLOUT "${OUT}".mtz \
<<-ENDf2mtz > "${OUT}".log
TITLE ${TITLE}
NAME PROJECT ${PROJECT} CRYSTAL ${PROTEIN} DATASET $(basename "${SAMPLENAME}")
CELL ${CELL}
SYMM ${SPACEGROUP}
SKIP 3
LABOUT H K L I SIGI
CTYPE H H H J Q
FORMAT '(3(F4.0,1X),F10.2,10X,F10.2)'
END
ENDf2mtz
then
printf "%bFailed to write mtz!%b\n" "${R}" "${W}"
exit 1
fi
rm "${OUT}".temp.hkl
# # FIRST TIME DO uniqueify {-p fraction} sample_0.mtz sample_0_rFree.mtz
# # ^ default fraction is 0.05
if [ ! -f "$RFREE" ]
then
printf "%s Generating FreeR_flag column\n" "$(date +"%y-%m-%d %H:%M")"
if ! uniqueify -p 0.05 "${OUT}".mtz "${RFREE}"
then
printf "%bFailed to generate FreeR_flag column!%b\n" "${R}" "${W}"
exit 1
fi
if ! mv -f "$(basename "$RFREE" .mtz)".log "$(dirname "${RFREE}")"/.
then
printf "%bFailed to move log file!%b\n" "${R}" "${W}"
fi
fi
# # --- run CTRUNCATE ---
# # INFO: Intensity to amplitude conversion and data statistics.
# # example: ccp4/8.0/examples/unix/runnable/ctruncate.exam
printf "%s Converting intensities\n" "$(date +"%y-%m-%d %H:%M")"
IN=${OUT}
OUT+="_trunc"
if ! ctruncate -stdin \
<<-ENDctruncate > ${OUT}.log
hklin ${IN}.mtz
hklout ${OUT}.mtz
colin /*/*/[I,SIGI]
ENDctruncate
then
printf "%bFailed to convert intensities!%b\n" "${R}" "${W}"
exit 1
fi
# # --- Run CAD ---
# # INFO: Collect and sort crystallographic reflection data from several files,
# # ^ to generate a single set.
# # example: ccp4/8.0/examples/unix/runnable/cad.exam
printf "%s Combining amplitudes\n" "$(date +"%y-%m-%d %H:%M")"
IN=${OUT}
OUT+="_cad"
if ! cad \
hklin1 ${IN}.mtz \
hklin2 "${RFREE}" \
hklout ${OUT}.mtz \
<<-ENDcad > ${OUT}.log
TITLE ${TITLE}
LABIN FILE 1 ALLIn
LABIN FILE 2 E1=FreeR_flag
END
ENDcad
then
printf "%bFailed to combine amplitudes!%b\n" "${R}" "${W}"
exit 1
fi
# # --- Run UNIQUEIFY ---
# # INFO: Generate a unique list of reflections
# # example: ccp4/8.0/examples/unix/runnable/unique-free-R
printf "%s Listing unique reflections\n" "$(date +"%y-%m-%d %H:%M")"
IN=${OUT}
OUT+="_uniq"
if ! uniqueify -f FreeR_flag ${IN}.mtz ${OUT}.mtz
then
printf "%bFailed to list unique reflections!%b\n" "${R}" "${W}"
exit 1
fi
mv "$(basename ${OUT}.mtz .mtz)".log "$(dirname "${OUT}")"/.
# # --- a. partial occupancy refinement ---
# # INFO: A
printf "%s Refinement partial occupancy\n" "$(date +"%y-%m-%d %H:%M")"
IN=${OUT}
# OUT+="_por"
HKLIN=${IN}.mtz
# echo Refining light with "$OCCUPANCY" partial occupancy
phenix.refine \
pdb.file_name="${COORDINATES}" \
xray_data.file_name="${HKLIN}" \
xray_data.labels="F,SIGF" \
output.prefix="${OUT}" \
map.file_name="${OUT}".ccp4 \
"$PARAMETERS" \
--quiet --overwrite
OUT+="_001"
R_FACTORS=$(grep 'R-work' ${OUT}.log|tail -n 1 |xargs)
if [[ "${R_FACTORS:17:4}" -lt 2500 ]]
then
printf "%s %b%s%b\n" "$(date +"%y-%m-%d %H:%M")" "${G}" "${R_FACTORS}" "${W}"
else
printf "%s %b%s%b\n" "$(date +"%y-%m-%d %H:%M")" "${R}" "${R_FACTORS}" "${W}"
fi
# # CONVERT HETATM entries to ATOM entries
sed 's/HETATM/ATOM /' ${OUT}.pdb > ${OUT}_hetatm2atom.pdb