forked from gatk-workflows/gatk4-exome-analysis-pipeline
-
Notifications
You must be signed in to change notification settings - Fork 1
/
ExomeGermlineSingleSample.wdl
74 lines (65 loc) · 3.46 KB
/
ExomeGermlineSingleSample.wdl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
version 1.0
## Copyright Broad Institute, 2018
##
## This WDL pipeline implements data pre-processing and initial variant calling (GVCF
## generation) according to the GATK Best Practices (June 2016) for germline SNP and
## Indel discovery in human exome sequencing data.
##
## Requirements/expectations :
## - Human exome sequencing data in unmapped BAM (uBAM) format
## - One or more read groups, one per uBAM file, all belonging to a single sample (SM)
## - Input uBAM files must additionally comply with the following requirements:
## - - filenames all have the same suffix (we use ".unmapped.bam")
## - - files must pass validation by ValidateSamFile
## - - reads are provided in query-sorted order
## - - all reads must have an RG tag
## - GVCF output names must end in ".g.vcf.gz"
## - Reference genome must be Hg38 with ALT contigs
##
## Runtime parameters are optimized for Broad's Google Cloud Platform implementation.
## For program versions, see docker containers.
##
## LICENSING :
## This script is released under the WDL source code license (BSD-3) (see LICENSE in
## https://github.com/broadinstitute/wdl). Note however that the programs it calls may
## be subject to different licenses. Users are responsible for checking that they are
## authorized to run all programs before running this script. Please see the docker
## page at https://hub.docker.com/r/broadinstitute/genomes-in-the-cloud/ for detailed
## licensing information pertaining to the included programs.
import "https://raw.githubusercontent.com/talkowski-lab/gatk4-exome-analysis-pipeline/master/tasks/GermlineVariantDiscovery.wdl" as Calling
import "https://raw.githubusercontent.com/talkowski-lab/gatk4-exome-analysis-pipeline/master/tasks/Qc.wdl" as QC
import "https://raw.githubusercontent.com/talkowski-lab/gatk4-exome-analysis-pipeline/master/tasks/Utilities.wdl" as Utils
import "https://raw.githubusercontent.com/talkowski-lab/gatk4-exome-analysis-pipeline/master/tasks/VariantCalling.wdl" as ToGvcf
import "https://raw.githubusercontent.com/talkowski-lab/gatk4-exome-analysis-pipeline/master/structs/GermlineStructs.wdl"
# WORKFLOW DEFINITION
workflow ExomeGermlineSingleSample {
input {
SampleAndUnmappedBams sample_and_unmapped_bams
PapiSettings papi_settings
GermlineSingleSampleReferences references
}
call ToGvcf.VariantCalling as BamToGvcf {
input:
calling_interval_list = references.calling_interval_list,
evaluation_interval_list = references.evaluation_interval_list,
haplotype_scatter_count = references.haplotype_scatter_count,
break_bands_at_multiples_of = references.break_bands_at_multiples_of,
input_bam = sample_and_unmapped_bams.input_cram,
input_crai = sample_and_unmapped_bams.input_crai,
ref_fasta = references.reference_fasta.ref_fasta,
ref_fasta_index = references.reference_fasta.ref_fasta_index,
ref_dict = references.reference_fasta.ref_dict,
dbsnp_vcf = references.dbsnp_vcf,
dbsnp_vcf_index = references.dbsnp_vcf_index,
base_file_name = sample_and_unmapped_bams.base_file_name,
final_vcf_base_name = sample_and_unmapped_bams.final_gvcf_base_name,
agg_preemptible_tries = papi_settings.agg_preemptible_tries
}
# Outputs that will be retained when execution is complete
output {
File gvcf_summary_metrics = BamToGvcf.vcf_summary_metrics
File gvcf_detail_metrics = BamToGvcf.vcf_detail_metrics
File output_vcf = BamToGvcf.output_vcf
File output_vcf_index = BamToGvcf.output_vcf_index
}
}