diff --git a/main.nf b/main.nf index b63bdfe..2de0c4f 100644 --- a/main.nf +++ b/main.nf @@ -46,7 +46,7 @@ def helpMsg() { --gatk_app Link to gatk executable [default: '$gatk_app'] --java_options Java options for gatk [default:'${java_options}'] --gatk_cluster_options GATK cluster options [default:'${params.gatk_cluster_options}'] - --gatk_HaplotypeCaller_params Additional parameters to pass to GATK HaplotypeCaller [default:'${params.gatk_HaplotypeCaller_params}'] + --gatk_haplotype_caller_params Additional parameters to pass to GATK HaplotypeCaller [default:'${params.gatk_haplotype_caller_params}'] Aligners: --bwamem2_app Link to bwamem2 executable [default: '$bwamem2_app'] @@ -80,7 +80,7 @@ if(params.help){ def parameters_valid = ['help','outdir', 'genome','gtf','reads','reads_file','long_reads','invariant','seq', 'singularity_img','docker_img','container_img', - 'gatk_app','gatk_HaplotypeCaller_params', + 'gatk_app','gatk_haplotype_caller_params', 'star_app','star_index_params','star_index_file','bwamem2_app','samtools_app','bedtools_app','datamash_app','vcftools_app', 'pbmm2_app', 'java_options','window','queueSize','queue-size','account', 'threads', 'gatk_cluster_options'] as Set diff --git a/modules/local/DNAseq.nf b/modules/local/DNAseq.nf index 61697da..44830ec 100644 --- a/modules/local/DNAseq.nf +++ b/modules/local/DNAseq.nf @@ -127,6 +127,52 @@ process MergeBamAlignment { """ } +process MarkDuplicates { + label 'gatk' + publishDir "${params.outdir}/03_PrepGATK" + input: + tuple path(bam), path(bai), path(genome_fasta), path(genome_dict) + output: + path("${bam.simpleName}_marked.bam") + script: + """ + #! /usr/bin/env bash + $gatk_app --java-options "${java_options}" MarkDuplicates \ + --INPUT $bam \ + --OUTPUT ${bam.simpleName}_marked.bam \ + --METRICS_FILE ${bam.simpleName}_marked.metrics \ + --VALIDATION_STRINGENCY SILENT \ + --OPTICAL_DUPLICATE_PIXEL_DISTANCE 2500 \ + --ASSUME_SORT_ORDER "queryname" \ + --CREATE_MD5_FILE true + """ +} + +process SortAndFixTags { + label 'gatk' + publishDir "${params.outdir}/03_PrepGATK" + input: + tuple path(bam), path(genome_fasta), path(genome_dict) + output: + tuple path("${bam.simpleName}_sorted.bam"), path("${bam.simpleName}_sorted.bai") + script: + """ + #! /usr/bin/env bash + $gatk_app --java-options "${java_options}" SortSam \ + --INPUT $bam \ + --OUTPUT /dev/stdout \ + --SORT_ORDER "coordinate" \ + --CREATE_INDEX false \ + --CREATE_MD5_FILE false \ + | $gatk_app --java-options "${java_options}" SetNmMdAndUqTags \ + --INPUT /dev/stdin \ + --OUTPUT ${bam.simpleName}_sorted.bam \ + --CREATE_INDEX true \ + --CREATE_MD5_FILE true \ + --REFERENCE_SEQUENCE $genome_fasta + """ +} + process gatk_HaplotypeCaller { tag "$window" label 'gatk' diff --git a/modules/local/GATK.nf b/modules/local/GATK.nf index 0bb03b8..fdc980b 100644 --- a/modules/local/GATK.nf +++ b/modules/local/GATK.nf @@ -91,6 +91,36 @@ process CreateSequenceDictionary { """ } +process MarkDuplicates { + tag "$i_readname" + label 'gatk' + publishDir "${params.outdir}/03_PrepGATK" + + input: // [readgroup, unmapped reads, mapped reads] + tuple val(i_readname), path(merge_bam), path(merge_bai) + + output: // merged bam and bai files + tuple val("${i_readname}"), path("${i_readname}_markduplicates.bam"), path("${i_readname}_markduplicates.bai") + + script: + """ + ${gatk_app} \ + MarkDuplicates \ + --INPUT ${merge_bam} \ + --OUTPUT ${i_readname}_markduplicates.bam \ + --CREATE_INDEX true \ + --VALIDATION_STRINGENCY SILENT \ + --METRICS_FILE ${i_readname}_markduplicates.metrics + """ + + stub: + """ + #! /usr/bin/env bash + touch ${i_readname}_markduplicates.bam + touch ${i_readname}_markduplicates.bai + """ +} + process samtools_faidx { tag "${genome_fasta.simpleName}" label 'samtools' diff --git a/modules/local/RNAseq.nf b/modules/local/RNAseq.nf index ee74c75..8b6ff53 100644 --- a/modules/local/RNAseq.nf +++ b/modules/local/RNAseq.nf @@ -131,37 +131,6 @@ process MergeBamAlignment { """ } - -process MarkDuplicates { - tag "$i_readname" - label 'gatk' - publishDir "${params.outdir}/03_PrepGATK" - - input: // [readgroup, unmapped reads, mapped reads] - tuple val(i_readname), path(merge_bam), path(merge_bai) - - output: // merged bam and bai files - tuple val("${i_readname}"), path("${i_readname}_markduplicates.bam"), path("${i_readname}_markduplicates.bai") - - script: - """ - ${gatk_app} \ - MarkDuplicates \ - --INPUT ${merge_bam} \ - --OUTPUT ${i_readname}_markduplicates.bam \ - --CREATE_INDEX true \ - --VALIDATION_STRINGENCY SILENT \ - --METRICS_FILE ${i_readname}_markduplicates.metrics - """ - - stub: - """ - #! /usr/bin/env bash - touch ${i_readname}_markduplicates.bam - touch ${i_readname}_markduplicates.bai - """ -} - process SplitNCigarReads { tag "$i_readname" label 'gatk' diff --git a/nextflow.config b/nextflow.config index b825db6..e9a216f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -28,7 +28,7 @@ params { docker_img = 'j23414/gatk4' container_img = 'docker://ghcr.io/aseetharam/gatk:master' gatk_app = 'gatk' - gatk_HaplotypeCaller_params = '' + gatk_haplotype_caller_params = "" star_app = 'STAR' bwamem2_app = 'bwa-mem2' samtools_app = 'samtools' @@ -58,7 +58,7 @@ env { star_index_file = params.star_index_file bedtools_app = "$params.bedtools_app" gatk_app = "$params.gatk_app" - gatk_HaplotypeCaller_params = "$params.gatk_HaplotypeCaller_params" + gatk_HaplotypeCaller_params = params.gatk_haplotype_caller_params ? "$params.gatk_haplotype_caller_params" : " " datamash_app = "$params.datamash_app" vcftools_app = "$params.vcftools_app" pbmm2_app = "$params.pbmm2_app" diff --git a/subworkflows/local/dna_variant_calling/main.nf b/subworkflows/local/dna_variant_calling/main.nf index d7799f6..54697ea 100644 --- a/subworkflows/local/dna_variant_calling/main.nf +++ b/subworkflows/local/dna_variant_calling/main.nf @@ -20,6 +20,8 @@ include { SamToFastq as SamToFastq_DNA bwamem2_index; bwamem2_mem; MergeBamAlignment as MergeBamAlignment_DNA; + MarkDuplicates as MarkDuplicates_DNA; + SortAndFixTags as SortAndFixTags_DNA; gatk_HaplotypeCaller as gatk_HaplotypeCaller_DNA; gatk_HaplotypeCaller_invariant; } from '../../../modules/local/DNAseq.nf' @@ -57,6 +59,12 @@ workflow DNA_VARIANT_CALLING { | combine(genome_ch) | combine(CreateSequenceDictionary.out) | MergeBamAlignment_DNA + | combine(genome_ch) + | combine(CreateSequenceDictionary.out) + | MarkDuplicates_DNA + | combine(genome_ch) + | combine(CreateSequenceDictionary.out) + | SortAndFixTags_DNA if(params.invariant) { allbambai_ch = MergeBamAlignment.out // do these need to be merged by read? diff --git a/subworkflows/local/long_read_variant_calling/main.nf b/subworkflows/local/long_read_variant_calling/main.nf index 6aa6d95..773b96d 100644 --- a/subworkflows/local/long_read_variant_calling/main.nf +++ b/subworkflows/local/long_read_variant_calling/main.nf @@ -3,6 +3,7 @@ nextflow.enable.dsl=2 include { CreateSequenceDictionary; + MarkDuplicates; samtools_faidx; keep_only_pass; bedtools_makewindows; } from '../../../modules/local/GATK.nf' @@ -15,10 +16,6 @@ include { pbmm2_index; calc_DPvalue as calc_DPvalue_LongRead; VariantFiltration as VariantFiltration_LongRead } from '../../../modules/local/LongReadseq.nf' -include { MarkDuplicates as MarkDuplicates_LongRead; } from '../../../modules/local/RNAseq.nf' - - - workflow LONGREAD_VARIANT_CALLING { take: genome_ch @@ -44,7 +41,7 @@ workflow LONGREAD_VARIANT_CALLING { | pbmm2_index | combine(cleanreads_ch) | pbmm2_align - | MarkDuplicates_LongRead + | MarkDuplicates | combine(windows_ch) | view | combine(genome_ch) diff --git a/subworkflows/local/rna_variant_calling/main.nf b/subworkflows/local/rna_variant_calling/main.nf index 64189c7..63bb3c9 100644 --- a/subworkflows/local/rna_variant_calling/main.nf +++ b/subworkflows/local/rna_variant_calling/main.nf @@ -5,6 +5,7 @@ nextflow.enable.dsl=2 include { FastqToSam; MarkIlluminaAdapters; CreateSequenceDictionary; + MarkDuplicates; samtools_faidx; bedtools_makewindows; CombineGVCFs; @@ -20,7 +21,6 @@ include { SamToFastq as SamToFastq_RNA; STAR_index; STAR_align; MergeBamAlignment as MergeBamAlignment_RNA; - MarkDuplicates; SplitNCigarReads; gatk_HaplotypeCaller as gatk_HaplotypeCaller_RNA; } from '../../../modules/local/RNAseq.nf'