Skip to content

Commit

Permalink
Add markduplicates to gatk dna pipeline #15
Browse files Browse the repository at this point in the history
  • Loading branch information
j23414 authored Feb 5, 2024
2 parents 5a26fc9 + 5d9a73d commit 73bfc89
Show file tree
Hide file tree
Showing 8 changed files with 91 additions and 41 deletions.
4 changes: 2 additions & 2 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def helpMsg() {
--gatk_app Link to gatk executable [default: '$gatk_app']
--java_options Java options for gatk [default:'${java_options}']
--gatk_cluster_options GATK cluster options [default:'${params.gatk_cluster_options}']
--gatk_HaplotypeCaller_params Additional parameters to pass to GATK HaplotypeCaller [default:'${params.gatk_HaplotypeCaller_params}']
--gatk_haplotype_caller_params Additional parameters to pass to GATK HaplotypeCaller [default:'${params.gatk_haplotype_caller_params}']
Aligners:
--bwamem2_app Link to bwamem2 executable [default: '$bwamem2_app']
Expand Down Expand Up @@ -80,7 +80,7 @@ if(params.help){
def parameters_valid = ['help','outdir',
'genome','gtf','reads','reads_file','long_reads','invariant','seq',
'singularity_img','docker_img','container_img',
'gatk_app','gatk_HaplotypeCaller_params',
'gatk_app','gatk_haplotype_caller_params',
'star_app','star_index_params','star_index_file','bwamem2_app','samtools_app','bedtools_app','datamash_app','vcftools_app',
'pbmm2_app',
'java_options','window','queueSize','queue-size','account', 'threads', 'gatk_cluster_options'] as Set
Expand Down
46 changes: 46 additions & 0 deletions modules/local/DNAseq.nf
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,52 @@ process MergeBamAlignment {
"""
}

process MarkDuplicates {
label 'gatk'
publishDir "${params.outdir}/03_PrepGATK"
input:
tuple path(bam), path(bai), path(genome_fasta), path(genome_dict)
output:
path("${bam.simpleName}_marked.bam")
script:
"""
#! /usr/bin/env bash
$gatk_app --java-options "${java_options}" MarkDuplicates \
--INPUT $bam \
--OUTPUT ${bam.simpleName}_marked.bam \
--METRICS_FILE ${bam.simpleName}_marked.metrics \
--VALIDATION_STRINGENCY SILENT \
--OPTICAL_DUPLICATE_PIXEL_DISTANCE 2500 \
--ASSUME_SORT_ORDER "queryname" \
--CREATE_MD5_FILE true
"""
}

process SortAndFixTags {
label 'gatk'
publishDir "${params.outdir}/03_PrepGATK"
input:
tuple path(bam), path(genome_fasta), path(genome_dict)
output:
tuple path("${bam.simpleName}_sorted.bam"), path("${bam.simpleName}_sorted.bai")
script:
"""
#! /usr/bin/env bash
$gatk_app --java-options "${java_options}" SortSam \
--INPUT $bam \
--OUTPUT /dev/stdout \
--SORT_ORDER "coordinate" \
--CREATE_INDEX false \
--CREATE_MD5_FILE false \
| $gatk_app --java-options "${java_options}" SetNmMdAndUqTags \
--INPUT /dev/stdin \
--OUTPUT ${bam.simpleName}_sorted.bam \
--CREATE_INDEX true \
--CREATE_MD5_FILE true \
--REFERENCE_SEQUENCE $genome_fasta
"""
}

process gatk_HaplotypeCaller {
tag "$window"
label 'gatk'
Expand Down
30 changes: 30 additions & 0 deletions modules/local/GATK.nf
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,36 @@ process CreateSequenceDictionary {
"""
}

process MarkDuplicates {
tag "$i_readname"
label 'gatk'
publishDir "${params.outdir}/03_PrepGATK"

input: // [readgroup, unmapped reads, mapped reads]
tuple val(i_readname), path(merge_bam), path(merge_bai)

output: // merged bam and bai files
tuple val("${i_readname}"), path("${i_readname}_markduplicates.bam"), path("${i_readname}_markduplicates.bai")

script:
"""
${gatk_app} \
MarkDuplicates \
--INPUT ${merge_bam} \
--OUTPUT ${i_readname}_markduplicates.bam \
--CREATE_INDEX true \
--VALIDATION_STRINGENCY SILENT \
--METRICS_FILE ${i_readname}_markduplicates.metrics
"""

stub:
"""
#! /usr/bin/env bash
touch ${i_readname}_markduplicates.bam
touch ${i_readname}_markduplicates.bai
"""
}

process samtools_faidx {
tag "${genome_fasta.simpleName}"
label 'samtools'
Expand Down
31 changes: 0 additions & 31 deletions modules/local/RNAseq.nf
Original file line number Diff line number Diff line change
Expand Up @@ -131,37 +131,6 @@ process MergeBamAlignment {
"""
}


process MarkDuplicates {
tag "$i_readname"
label 'gatk'
publishDir "${params.outdir}/03_PrepGATK"

input: // [readgroup, unmapped reads, mapped reads]
tuple val(i_readname), path(merge_bam), path(merge_bai)

output: // merged bam and bai files
tuple val("${i_readname}"), path("${i_readname}_markduplicates.bam"), path("${i_readname}_markduplicates.bai")

script:
"""
${gatk_app} \
MarkDuplicates \
--INPUT ${merge_bam} \
--OUTPUT ${i_readname}_markduplicates.bam \
--CREATE_INDEX true \
--VALIDATION_STRINGENCY SILENT \
--METRICS_FILE ${i_readname}_markduplicates.metrics
"""

stub:
"""
#! /usr/bin/env bash
touch ${i_readname}_markduplicates.bam
touch ${i_readname}_markduplicates.bai
"""
}

process SplitNCigarReads {
tag "$i_readname"
label 'gatk'
Expand Down
4 changes: 2 additions & 2 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ params {
docker_img = 'j23414/gatk4'
container_img = 'docker://ghcr.io/aseetharam/gatk:master'
gatk_app = 'gatk'
gatk_HaplotypeCaller_params = ''
gatk_haplotype_caller_params = ""
star_app = 'STAR'
bwamem2_app = 'bwa-mem2'
samtools_app = 'samtools'
Expand Down Expand Up @@ -58,7 +58,7 @@ env {
star_index_file = params.star_index_file
bedtools_app = "$params.bedtools_app"
gatk_app = "$params.gatk_app"
gatk_HaplotypeCaller_params = "$params.gatk_HaplotypeCaller_params"
gatk_HaplotypeCaller_params = params.gatk_haplotype_caller_params ? "$params.gatk_haplotype_caller_params" : " "
datamash_app = "$params.datamash_app"
vcftools_app = "$params.vcftools_app"
pbmm2_app = "$params.pbmm2_app"
Expand Down
8 changes: 8 additions & 0 deletions subworkflows/local/dna_variant_calling/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ include { SamToFastq as SamToFastq_DNA
bwamem2_index;
bwamem2_mem;
MergeBamAlignment as MergeBamAlignment_DNA;
MarkDuplicates as MarkDuplicates_DNA;
SortAndFixTags as SortAndFixTags_DNA;
gatk_HaplotypeCaller as gatk_HaplotypeCaller_DNA;
gatk_HaplotypeCaller_invariant; } from '../../../modules/local/DNAseq.nf'

Expand Down Expand Up @@ -57,6 +59,12 @@ workflow DNA_VARIANT_CALLING {
| combine(genome_ch)
| combine(CreateSequenceDictionary.out)
| MergeBamAlignment_DNA
| combine(genome_ch)
| combine(CreateSequenceDictionary.out)
| MarkDuplicates_DNA
| combine(genome_ch)
| combine(CreateSequenceDictionary.out)
| SortAndFixTags_DNA

if(params.invariant) {
allbambai_ch = MergeBamAlignment.out // do these need to be merged by read?
Expand Down
7 changes: 2 additions & 5 deletions subworkflows/local/long_read_variant_calling/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
nextflow.enable.dsl=2

include { CreateSequenceDictionary;
MarkDuplicates;
samtools_faidx;
keep_only_pass;
bedtools_makewindows; } from '../../../modules/local/GATK.nf'
Expand All @@ -15,10 +16,6 @@ include { pbmm2_index;
calc_DPvalue as calc_DPvalue_LongRead;
VariantFiltration as VariantFiltration_LongRead } from '../../../modules/local/LongReadseq.nf'

include { MarkDuplicates as MarkDuplicates_LongRead; } from '../../../modules/local/RNAseq.nf'



workflow LONGREAD_VARIANT_CALLING {
take:
genome_ch
Expand All @@ -44,7 +41,7 @@ workflow LONGREAD_VARIANT_CALLING {
| pbmm2_index
| combine(cleanreads_ch)
| pbmm2_align
| MarkDuplicates_LongRead
| MarkDuplicates
| combine(windows_ch)
| view
| combine(genome_ch)
Expand Down
2 changes: 1 addition & 1 deletion subworkflows/local/rna_variant_calling/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ nextflow.enable.dsl=2
include { FastqToSam;
MarkIlluminaAdapters;
CreateSequenceDictionary;
MarkDuplicates;
samtools_faidx;
bedtools_makewindows;
CombineGVCFs;
Expand All @@ -20,7 +21,6 @@ include { SamToFastq as SamToFastq_RNA;
STAR_index;
STAR_align;
MergeBamAlignment as MergeBamAlignment_RNA;
MarkDuplicates;
SplitNCigarReads;
gatk_HaplotypeCaller as gatk_HaplotypeCaller_RNA; } from '../../../modules/local/RNAseq.nf'

Expand Down

0 comments on commit 73bfc89

Please sign in to comment.