-
Notifications
You must be signed in to change notification settings - Fork 5
/
shift.wdl
489 lines (451 loc) · 20.5 KB
/
shift.wdl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
version development
## Copyright (c) 2021-2024 Giulio Genovese
##
## Version 2024-09-27
##
## Contact Giulio Genovese <[email protected]>
##
## This WDL workflow runs allelic shift imbalance analysis in a given region
##
## Cromwell version support
## - Successfully tested on v86
##
## Distributed under terms of the MIT License
struct Reference {
File fasta
File fasta_fai
String n_x_chr
File? cyto_file
String? chr_prefix
Array[Int] len
Array[Int] pcen
Array[Int] qcen
File? gff3_file # http://ftp.ensembl.org/pub/current_gff3/homo_sapiens/
File? rsid_vcf_file # http://ftp.ncbi.nlm.nih.gov/snp/latest_release/VCF/
File? rsid_vcf_idx
}
workflow shift {
input {
String sample_set_id
File? keep_samples_file
File? remove_samples_file
File pheno_tsv_file
String as_id = "AS"
String ext_string = "as"
String? pop
String ref_name = "GRCh38"
String? ref_fasta
String? ref_fasta_fai
String? ref_path
String? chr_prefix
Int? n_x_chr
String? cyto_file
String? gff3_file
String? rsid_vcf_file
String? rsid_vcf_idx
File impute_tsv_file # batch_id path chr1_imp_vcf chr1_imp_vcf_index chr2_imp_vcf chr2_imp_vcf_index ...
String? impute_data_path
Boolean fisher_exact = true
Boolean drop_genotypes = true
Boolean phred_score = true
Boolean plot = true
String basic_bash_docker = "debian:stable-slim"
String docker_repository = "us.gcr.io/mccarroll-mocha"
String bcftools_docker = "bcftools:1.20-20240927"
String r_mocha_docker = "r_mocha:1.20-20240927"
}
String docker_repository_with_sep = docker_repository + if docker_repository != "" && docker_repository == sub(docker_repository, "/$", "") then "/" else ""
String ref_path_with_sep = select_first([ref_path, ""]) + if defined(ref_path) && select_first([ref_path]) == sub(select_first([ref_path]), "/$", "") then "/" else ""
Reference ref = object {
fasta: if defined(ref_fasta) then ref_path_with_sep + select_first([ref_fasta]) else if ref_name == "GRCh38" then ref_path_with_sep + "GCA_000001405.15_GRCh38_no_alt_analysis_set.fna" else if ref_name == "GRCh37" then ref_path_with_sep + "human_g1k_v37.fasta" else None,
fasta_fai: if defined(ref_fasta_fai) then ref_path_with_sep + select_first([ref_fasta_fai]) else if ref_name == "GRCh38" then ref_path_with_sep + "GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.fai" else if ref_name == "GRCh37" then ref_path_with_sep + "human_g1k_v37.fasta.fai" else None,
n_x_chr: select_first([n_x_chr, 23]),
cyto_file: if defined(ref_path) || defined(cyto_file) then ref_path_with_sep + select_first([cyto_file, "cytoBand.txt.gz"]) else None,
chr_prefix: if ref_name == "GRCh38" then "chr" else if ref_name == "GRCh37" then "" else chr_prefix,
len: if ref_name == "GRCh38" then
[248956422, 242193529, 198295559, 190214555, 181538259, 170805979, 159345973, 145138636, 138394717, 133797422, 135086622, 133275309, 114364328, 107043718, 101991189, 90338345, 83257441, 80373285, 58617616, 64444167, 46709983, 50818468, 156040895]
else if ref_name == "GRCh37" then
[249250621, 243199373, 198022430, 191154276, 180915260, 171115067, 159138663, 146364022, 141213431, 135534747, 135006516, 133851895, 115169878, 107349540, 102531392, 90354753, 81195210, 78077248, 59128983, 63025520, 48129895, 51304566, 155270560]
else None,
pcen: if ref_name == "GRCh38" then
[122026459, 92188145, 90772458, 49712061, 46485900, 58553888, 58169653, 44033744, 43389635, 39686682, 51078348, 34769407, 16000000, 16000000, 17083673, 36311158, 22813679, 15460899, 24498980, 26436232, 10864560, 12954788, 58605579]
else if ref_name == "GRCh37" then
[121535434, 92326171, 90504854, 49660117, 46405641, 58830166, 58054331, 43838887, 47367679, 39254935, 51644205, 34856694, 16000000, 16000000, 17000000, 35335801, 22263006, 15460898, 24681782, 26369569, 11288129, 13000000, 58632012]
else None,
qcen: if ref_name == "GRCh38" then
[124932724, 94090557, 93655574, 51743951, 50059807, 59829934, 61528020, 45877265, 45518558, 41593521, 54425074, 37185252, 18051248, 18173523, 19725254, 38265669, 26616164, 20861206, 27190874, 30038348, 12915808, 15054318, 62412542]
else if ref_name == "GRCh37" then
[124535434, 95326171, 93504854, 52660117, 49405641, 61830166, 61054331, 46838887, 50367679, 42254935, 54644205, 37856694, 19000000, 19000000, 20000000, 38335801, 25263006, 18460898, 27681782, 29369569, 14288129, 16000000, 61632012]
else None,
gff3_file: if defined(gff3_file) then ref_path_with_sep + select_first([gff3_file]) else None,
rsid_vcf_file: if defined(rsid_vcf_file) then ref_path_with_sep + select_first([rsid_vcf_file]) else None,
rsid_vcf_idx: if defined(rsid_vcf_idx) then ref_path_with_sep + select_first([rsid_vcf_idx]) else None
}
# read table with batches information (scatter could be avoided if there was a tail() function)
Array[Array[String]] impute_tsv = read_tsv(impute_tsv_file)
Int n_batches = length(impute_tsv)-1
scatter (idx in range(n_batches)) { Array[String] impute_tsv_rows = impute_tsv[(idx+1)] }
Map[String, Array[String]] impute_tbl = as_map(zip(impute_tsv[0], transpose(impute_tsv_rows)))
# check if path is in impute table (see http://github.com/openwdl/wdl/issues/305)
Boolean is_path_in_impute_tbl = length(collect_by_key(zip(flatten([keys(impute_tbl),["path"]]),range(length(keys(impute_tbl))+1)))["path"])>1
# compute data paths for each batch
scatter (idx in range(n_batches)) {
String impute_data_paths = select_first([impute_data_path, if is_path_in_impute_tbl then impute_tbl["path"][idx] else ""])
String impute_data_paths_with_sep = impute_data_paths + (if impute_data_paths == "" || sub(impute_data_paths, "/$", "") != impute_data_paths then "" else "/")
}
call lst_header { input: pheno_tsv_file = pheno_tsv_file, docker = basic_bash_docker }
# compute phenotype regions to test
scatter (idx in range(length(lst_header.phenos))) {
String region_name = sub(lst_header.phenos[idx], "_.*$", "")
String chr_string = sub(sub(region_name, "[pq]*$", ""), "Y", "X")
if (sub(chr_string, "[0-9X]+", "") == "") {
Int pheno_idx = idx
String pheno_chr = chr_string
String pheno_name = lst_header.phenos[idx]
String vcf_file_suffix = pheno_name + "." + ext_string + ".bcf" # http://github.com/broadinstitute/cromwell/issues/5549
String vcf_idx_suffix = pheno_name + "." + ext_string + ".bcf.csi" # http://github.com/broadinstitute/cromwell/issues/5549
Int chr_idx = sub(chr_string, "^X$", ref.n_x_chr)
String arm = sub(region_name, "^[0-9XY]*", "")
String pheno_regions = ref.chr_prefix + chr_string + if arm == "p" then ":1-" + ref.pcen[(chr_idx - 1)] else if arm == "q" then ":" + ref.qcen[(chr_idx - 1)] + "-" + ref.len[(chr_idx - 1)] else ""
}
}
# generate list of all chromosomes to be processed
Map[String, Array[String]] chr2pheno_idx = collect_by_key(zip(select_all(pheno_chr), select_all(pheno_idx)))
Map[String, Array[String]] chr2pheno_names = collect_by_key(zip(select_all(pheno_chr), select_all(pheno_name)))
Map[String, Array[String]] chr2regions = collect_by_key(zip(select_all(pheno_chr), select_all(pheno_regions)))
Map[String, Array[String]] chr2vcf_file_suffix = collect_by_key(zip(select_all(pheno_chr), select_all(vcf_file_suffix))) # http://github.com/broadinstitute/cromwell/issues/5549
Map[String, Array[String]] chr2vcf_idx_suffix = collect_by_key(zip(select_all(pheno_chr), select_all(vcf_idx_suffix))) # http://github.com/broadinstitute/cromwell/issues/5549
scatter (p in cross(range(n_batches), keys(chr2pheno_names))) {
Array[Int] cross_idx = chr2pheno_idx[p.right]
call vcf_summary {
input:
vcf_file = impute_data_paths_with_sep[p.left] + impute_tbl[("chr" + p.right + "_imp_vcf")][p.left],
vcf_idx = impute_data_paths_with_sep[p.left] + impute_tbl[("chr" + p.right + "_imp_vcf_index")][p.left],
pheno_names = chr2pheno_names[p.right],
regions = chr2regions[p.right],
vcf_file_suffix = chr2vcf_file_suffix[p.right], # http://github.com/broadinstitute/cromwell/issues/5549
vcf_idx_suffix = chr2vcf_idx_suffix[p.right], # http://github.com/broadinstitute/cromwell/issues/5549
keep_samples_file = keep_samples_file,
remove_samples_file = remove_samples_file,
pheno_tsv_file = pheno_tsv_file,
fisher_exact = fisher_exact,
filebase = sample_set_id + "." + p.left,
as_id = as_id,
ext_string = ext_string,
drop_genotypes = drop_genotypes,
docker = docker_repository_with_sep + bcftools_docker
}
}
scatter (idx in select_all(pheno_idx)) {
Map[Int, Array[File]] idx2vcf_files = collect_by_key(zip(flatten(cross_idx), flatten(vcf_summary.as_vcf_files)))
Map[Int, Array[File]] idx2vcf_idxs = collect_by_key(zip(flatten(cross_idx), flatten(vcf_summary.as_vcf_idxs)))
call vcf_merge {
input:
vcf_files = idx2vcf_files[idx],
vcf_idxs = idx2vcf_idxs[idx],
ref_fasta = if defined(ref.gff3_file) then ref.fasta else None,
fasta_fai = ref.fasta_fai,
gff3_file = ref.gff3_file,
rsid_vcf_file = ref.rsid_vcf_file,
rsid_vcf_idx = ref.rsid_vcf_idx,
region = select_first([pheno_regions[idx]]),
fisher_exact = fisher_exact,
as_id = as_id,
filebase = sample_set_id + "." + ext_string + "_" + lst_header.phenos[idx] + if defined(pop) then "." + select_first([pop]) else "",
pheno_name = ext_string + "_" + lst_header.phenos[idx] + if defined(pop) then "." + select_first([pop]) else "",
phred_score = phred_score,
docker = docker_repository_with_sep + bcftools_docker
}
if (plot) {
if (vcf_merge.n > 0) {
call assoc_plot {
input:
vcf_file = vcf_merge.gwas_vcf_file,
vcf_idx = vcf_merge.gwas_vcf_idx,
region = select_first([pheno_regions[idx]]),
cyto_file = ref.cyto_file,
csq = defined(ref.gff3_file),
filebase = sample_set_id + "." + ext_string + "_" + lst_header.phenos[idx] + if defined(pop) then "." + select_first([pop]) else "",
docker = docker_repository_with_sep + r_mocha_docker
}
}
}
}
output {
Array[File] gwas_vcf_file = vcf_merge.gwas_vcf_file
Array[File] gwas_vcf_idx = vcf_merge.gwas_vcf_idx
Array[File]? png_files = if plot then select_all(assoc_plot.png_file) else None
}
meta {
author: "Giulio Genovese"
email: "[email protected]"
description: "See the [MoChA](http://github.com/freeseek/mocha) website for more information"
}
}
task lst_header {
input {
File pheno_tsv_file
String space_character = '_'
String docker
Int cpu = 1
Int disk_size = 10
Float memory = 3.5
Int preemptible = 1
Int maxRetries = 0
}
command <<<
set -euo pipefail
mv "~{pheno_tsv_file}" .
head -n1 "~{basename(pheno_tsv_file)}" | cut -f2- | tr '\t ' '\n~{space_character}'
rm "~{basename(pheno_tsv_file)}"
>>>
output {
Array[String] phenos = read_lines(stdout())
}
runtime {
docker: docker
cpu: cpu
disks: "local-disk " + disk_size + " HDD"
memory: memory + " GiB"
preemptible: preemptible
maxRetries: maxRetries
}
}
# the command requires BCFtools 1.14 due to bug http://github.com/samtools/bcftools/issues/1566
task vcf_summary {
input {
File vcf_file
File vcf_idx
Array[String]+ pheno_names
Array[String]+ regions
Array[String]+ vcf_file_suffix # suffix array passed due to bug http://github.com/broadinstitute/cromwell/issues/5549
Array[String]+ vcf_idx_suffix # suffix array passed due to bug http://github.com/broadinstitute/cromwell/issues/5549
File? keep_samples_file
File? remove_samples_file
File pheno_tsv_file
Boolean fisher_exact
String filebase
String as_id
String ext_string
Boolean drop_genotypes = true
String docker
Int cpu = 1
Int? disk_size_override
Float memory = 3.5
Int preemptible = 1
Int maxRetries = 0
}
Float vcf_size = size(vcf_file, "GiB")
Int disk_size = select_first([disk_size_override, ceil(10.0 + 2.0 * vcf_size)])
command <<<
set -euo pipefail
echo "~{sep("\n", select_all([vcf_file, vcf_idx, keep_samples_file, remove_samples_file, pheno_tsv_file]))}" | \
tr '\n' '\0' | xargs -0 mv -t .
pheno_names=~{write_lines(pheno_names)}
regions=~{write_lines(regions)}
paste $pheno_names $regions | while IFS=$'\t' read pheno_name region; do
awk -F"\t" -v pheno_name=$pheno_name 'NR==1 {for (i=1; i<=NF; i++) f[$i] = i}
NR>1 && ($(f[pheno_name])==0 || $(f[pheno_name])==1) {print $(f["sample_id"])"\t"$(f[pheno_name])}' \
"~{basename(pheno_tsv_file)}" > "~{filebase}.pheno.tsv"
cut -f1 "~{filebase}.pheno.tsv"~{if defined(keep_samples_file) then " | \\\n" +
"awk -F\"\\t\" 'NR==FNR {x[$1]++} NR>FNR && $1 in x' \"" + basename(select_first([keep_samples_file])) + "\" -"
else ""}~{if defined(remove_samples_file) then " | \\\n" +
"awk -F\"\\t\" 'NR==FNR {x[$1]++} NR>FNR && !($1 in x)' \"" + basename(select_first([remove_samples_file])) + "\" -"
else ""} > "~{filebase}.samples.lines"
~{if fisher_exact then "awk -F\"\\t\" '$2==0 {print $1}' \"" + filebase + ".pheno.tsv\" > \"" + filebase + ".controls.lines\"" else ""}
awk -F"\t" '$2==1 {print $1}' "~{filebase}.pheno.tsv" > "~{filebase}.cases.lines"
bcftools +mochatools \
--no-version \
--output-type u \
--regions $region \
"~{basename(vcf_file)}" \
-- --tags MACH \
--samples-file "~{filebase}.samples.lines" \
--force-samples | \
bcftools annotate \
--no-version \
--output-type u \
--remove ID,QUAL,FILTER,^INFO/MACH,^FMT/GT,FMT/~{as_id}~{if fisher_exact then " | \\\n" +
"bcftools +contrast \\\n" +
" --output-type u \\\n" +
" --annots NASSOC \\\n" +
" --control-samples \"" + filebase + ".controls.lines\" \\\n" +
" --case-samples \"" + filebase + ".cases.lines\" \\\n" +
" --force-samples"
else ""} | \
bcftools +mochatools \
--no-version \
--output-type u \
-- --summary ~{as_id} \
--samples-file "~{filebase}.cases.lines" \
--force-samples \
~{if drop_genotypes then "--drop-genotypes" else ""} | \
bcftools view --no-version --output "~{filebase}.$pheno_name.~{ext_string}.bcf" --output-type b --include 'sum(~{as_id})>0' --write-index
rm "~{filebase}.pheno.tsv" "~{filebase}.samples.lines" ~{if fisher_exact then "\"" + filebase + ".controls.lines\" " else ""}"~{filebase}.cases.lines"
done
echo "~{sep("\n", select_all([vcf_file, vcf_idx, keep_samples_file, remove_samples_file, pheno_tsv_file]))}" | \
sed 's/^.*\///' | tr '\n' '\0' | xargs -0 rm
>>>
output {
Array[File] as_vcf_files = prefix(filebase + ".", vcf_file_suffix)
Array[File] as_vcf_idxs = prefix(filebase + ".", vcf_idx_suffix)
}
runtime {
memory: memory + " GiB"
disks: "local-disk " + disk_size + " HDD"
cpu: cpu
docker: docker
preemptible: preemptible
maxRetries: maxRetries
}
}
# bcftools annotate needs --regions due to bug http://github.com/samtools/bcftools/issues/1199
# see http://github.com/MRCIEU/gwas-vcf-specification for VCF output
# see Marchini, J., Howie, B. Genotype imputation for genome-wide association studies. Nat Rev Genet 11, 499–511 (2010). http://doi.org/10.1038/nrg2796
task vcf_merge {
input {
Array[File]+ vcf_files
Array[File]+ vcf_idxs
File? ref_fasta
File fasta_fai
File? gff3_file
File? rsid_vcf_file
File? rsid_vcf_idx
String region
Boolean fisher_exact
String as_id
String filebase
String pheno_name
Boolean phred_score = true
String docker
Int cpu = 1
Int? disk_size_override
Float memory = 3.5
Int preemptible = 1
Int maxRetries = 0
}
Float vcf_size = size(vcf_files, "GiB")
Float ref_size = size(ref_fasta, "GiB")
Float dbsnp_size = size(rsid_vcf_file, "GiB")
Int disk_size = select_first([disk_size_override, ceil(10.0 + 2.0 * vcf_size + ref_size + dbsnp_size)])
command <<<
set -euo pipefail
vcf_files=~{write_lines(vcf_files)}
vcf_idxs=~{write_lines(vcf_idxs)}
echo "~{sep("\n", select_all([ref_fasta, fasta_fai, gff3_file, rsid_vcf_file, rsid_vcf_idx]))}" | \
cat - $vcf_files $vcf_idxs | tr '\n' '\0' | xargs -0 mv -t .
sed -i 's/^.*\///' $vcf_files $vcf_idxs
bcftools merge \
--no-version \
--output-type u \
--info-rules MACH:sum,~{if fisher_exact then "NASSOC:sum," else ""}~{as_id}:sum \
--file-list $vcf_files \
--merge none | \
~{if fisher_exact then
"bcftools +mochatools \\\n" +
" --no-version \\\n" +
" --output-type u \\\n" +
" -- --test NASSOC \\\n" +
(if phred_score then " --phred" else "") + " |"
else ""} \
bcftools +mochatools \
--no-version \
--output-type ~{if defined(gff3_file) then "u" else "b"} \
-- --test ~{as_id} \
~{if phred_score then "--phred" else ""} \
~{if defined(gff3_file) then " | \\\n" +
"bcftools csq \\\n" +
" --no-version \\\n" +
" --output-type b \\\n" +
" --fasta-ref \"" + basename(select_first([ref_fasta])) + "\" \\\n" +
" --gff-annot \"" + basename(select_first([gff3_file])) + "\" \\\n" +
" --trim-protein-seq 1 \\\n" +
" --custom-tag CSQ \\\n" +
" --local-csq \\\n" +
" --ncsq 64 \\\n" +
" --samples -" else ""} \
--output "~{filebase + if defined(rsid_vcf_file) then ".tmp" else ""}.bcf" \
--write-index
~{if defined(rsid_vcf_file) then
"bcftools annotate \\\n" +
" --no-version \\\n" +
" --annotations \"" + basename(select_first([rsid_vcf_file])) + "\" \\\n" +
" --columns ID \\\n" +
" --output \"" + filebase + ".bcf\" \\\n" +
" --output-type b \\\n" +
" --regions \"" + region + "\" \\\n" +
" \"" + filebase + ".tmp.bcf\" \\\n" +
" --write-index\n" +
" rm \"" + filebase + ".tmp.bcf\" \"" + filebase + ".tmp.bcf.csi\""
else ""}
(echo "CHROM GENPOS ALLELE0 ALLELE1 N INFO N_CASES BETA SE LOG10P A1FREQ AC_ALLELE1";
bcftools query --format "%CHROM\t%POS\t%REF\t%ALT\t%MACH{0}\t%MACH{1}\t%MACH{2}\t%AS{0}\t%AS{1}\t%pbinom_AS\n" "~{filebase}.bcf" | \
awk -F"\t" '{if ($6*($5-$6/2)==0) si=1; else si=($5*$7/$6-$6)/($5-$6/2);
nc=$8+$9; es=log(($9+.5)/($8+.5)); se=sqrt(1/($8+.5)+1/($9+.5)); lp=$10/10; af=$6/$5/2; ac=$6;
print $1,$2,$3,$4,$5,si,nc,es,se,lp,af,ac}') | \
bcftools +munge --no-version -c REGENIE --fai "~{basename(fasta_fai)}" -s ~{pheno_name} | \
bcftools merge --no-version --merge none --no-index --output "~{filebase}.gwas.bcf" --output-type b "~{filebase}.bcf" --write-index -
bcftools query -f "\n" -i 'sum(~{as_id})>1' "~{filebase}.gwas.bcf" | wc -l
rm "~{filebase}.bcf" "~{filebase}.bcf.csi"
echo "~{sep("\n", select_all([ref_fasta, fasta_fai, gff3_file, rsid_vcf_file, rsid_vcf_idx]))}" | \
cat - $vcf_files $vcf_idxs | sed 's/^.*\///' | tr '\n' '\0' | xargs -0 rm
>>>
output {
Int n = read_int(stdout())
File gwas_vcf_file = filebase + ".gwas.bcf"
File gwas_vcf_idx = filebase + ".gwas.bcf.csi"
}
runtime {
memory: memory + " GiB"
disks: "local-disk " + disk_size + " HDD"
cpu: cpu
docker: docker
preemptible: preemptible
maxRetries: maxRetries
}
}
task assoc_plot {
input {
File vcf_file
File vcf_idx
String region
File? cyto_file
Boolean csq = false
String filebase
String docker
Int cpu = 1
Int? disk_size_override
Float memory = 3.5
Int preemptible = 1
Int maxRetries = 0
}
Float vcf_size = size(vcf_file, "GiB")
Int disk_size = select_first([disk_size_override, ceil(10.0 + vcf_size)])
command <<<
set -euo pipefail
mv "~{vcf_file}" .
mv "~{vcf_idx}" .
~{if defined(cyto_file) then "mv \"" + select_first([cyto_file]) + "\" ." else ""}
assoc_plot.R \
~{if defined(cyto_file) then "--cytoband \"" + basename(select_first([cyto_file])) + "\"" else ""} \
--vcf "~{basename(vcf_file)}" \
--as \
~{if csq then "--csq" else ""} \
--region ~{region} \
--png "~{filebase}.png"
rm "~{basename(vcf_file)}"
rm "~{basename(vcf_idx)}"
~{if defined(cyto_file) then "rm \"" + basename(select_first([cyto_file])) + "\"" else ""}
>>>
output {
File png_file = filebase + '.png'
}
runtime {
docker: docker
cpu: cpu
disks: "local-disk " + disk_size + " HDD"
memory: memory + " GiB"
preemptible: preemptible
maxRetries: maxRetries
}
}