diff --git a/bin/chipseeker_peakplot.R b/bin/chipseeker_peakplot.R index 387727e4..556f0b4b 100755 --- a/bin/chipseeker_peakplot.R +++ b/bin/chipseeker_peakplot.R @@ -22,39 +22,42 @@ txdb <- args$txdb %>% rlang::sym() %>% eval() -np <- read.table(args$peak, sep = "\t") -peak_colnames <- c( - "chrom", - "chromStart", - "chromEnd", - "name", - "score", - "strand", - "signalValue", - "pValue", - "qValue" -) -num_columns <- ncol(np) -if (num_columns == 9) { - colnames(np) <- peak_colnames - np <- np %>% mutate(peak = NA) -} else if (num_columns == 10) { - colnames(np) <- c(peak_colnames, "peak") -} else { - stop(paste("Expected 9 or 10 columns in peak file, but", num_columns, "given")) +read_peaks <- function(peak_file) { + peak_colnames <- c( + "chrom", + "start", + "end", + "peakID", + "score", + "strand", + "signal", + "pvalue", + "qvalue", + "peak" + ) + peaks <- read.table(peak_file, sep = "\t") + if (!dplyr::between(ncol(peaks), 9, 10)) { + stop("Unexpected number of columns in peak file") + } + colnames(peaks) <- peak_colnames[seq_len(ncol(peaks))] + return(peaks) } +np <- read_peaks(args$peak) + # plots for individual peak file peaks <- GenomicRanges::GRanges( seqnames = np$chrom, - ranges = IRanges(np$chromStart, np$chromEnd), - qValue = np$qValue + ranges = IRanges(np$start, np$end), + pvalue = np$pvalue, + qvalue = np$qvalue ) +print(peaks) plots <- list( - covplot = covplot(peaks, weightCol = "qValue"), + covplot = covplot(peaks, weightCol = "qvalue"), plotPeakProf2 = plotPeakProf2( peak = peaks, upstream = rel(0.2), downstream = rel(0.2), conf = 0.95, by = "gene", type = "body", nbin = 800, - TxDb = txdb, weightCol = "qValue", ignore_strand = F + TxDb = txdb, weightCol = "qvalue", ignore_strand = TRUE ) ) diff --git a/main.nf b/main.nf index b5555c91..eb236101 100644 --- a/main.nf +++ b/main.nf @@ -21,12 +21,13 @@ log.info """\ include { INPUT_CHECK } from './subworkflows/local/input_check.nf' include { PREPARE_GENOME } from './subworkflows/local/prepare_genome.nf' -include { FILTER_BLACKLIST } from './subworkflows/CCBR/filter_blacklist' +include { FILTER_BLACKLIST } from './subworkflows/CCBR/filter_blacklist/' include { ALIGN_GENOME } from "./subworkflows/local/align.nf" include { DEDUPLICATE } from "./subworkflows/local/deduplicate.nf" include { QC } from './subworkflows/local/qc.nf' include { CALL_PEAKS } from './subworkflows/local/peaks.nf' - +include { CONSENSUS_PEAKS } from './subworkflows/CCBR/consensus_peaks/' +include { ANNOTATE } from './subworkflows/local/annotate.nf' // MODULES include { CUTADAPT } from "./modules/CCBR/cutadapt" @@ -92,13 +93,23 @@ workflow CHIPSEQ { deduped_tagalign, deduped_bam, frag_lengths, - effective_genome_size, - PREPARE_GENOME.out.fasta, - PREPARE_GENOME.out.meme_motifs, - PREPARE_GENOME.out.bioc_txdb, - PREPARE_GENOME.out.bioc_annot + effective_genome_size ) - ch_multiqc = ch_multiqc.mix(CALL_PEAKS.out.plots) + + // consensus peak calling on replicates + ch_peaks_grouped = CALL_PEAKS.out.peaks + .map{ meta, bed, tool -> + [ [ group: "${meta.sample_basename}.${tool}" ], bed ] + } + CONSENSUS_PEAKS( ch_peaks_grouped, params.run.normalize_peaks ) + + ANNOTATE(CONSENSUS_PEAKS.out.peaks, + PREPARE_GENOME.out.fasta, + PREPARE_GENOME.out.meme_motifs, + PREPARE_GENOME.out.bioc_txdb, + PREPARE_GENOME.out.bioc_annot) + ch_multiqc = ch_multiqc.mix(CALL_PEAKS.out.plots, ANNOTATE.out.plots) + } MULTIQC( diff --git a/modules/local/chipseeker/peakplot/main.nf b/modules/local/chipseeker/peakplot/main.nf index 75fd6d16..9c3eaa4e 100644 --- a/modules/local/chipseeker/peakplot/main.nf +++ b/modules/local/chipseeker/peakplot/main.nf @@ -1,12 +1,12 @@ process CHIPSEEKER_PEAKPLOT { - tag "${meta.id}.${group}" + tag "${meta.id}" label 'peaks' label 'process_medium' container 'nciccbr/ccbr_chipseeker:1.1.2' input: - tuple val(meta), path(bed), val(group) + tuple val(meta), path(bed) val(txdb) val(annot_db) @@ -17,7 +17,7 @@ process CHIPSEEKER_PEAKPLOT { """ chipseeker_peakplot.R \\ --peak ${bed} \\ - --outfile-prefix ${meta.id}.${group} \\ + --outfile-prefix ${meta.id} \\ --genome-txdb ${txdb} \\ --genome-annot ${annot_db} """ @@ -26,7 +26,7 @@ process CHIPSEEKER_PEAKPLOT { """ for ftype in annotated.txt summary.txt genelist.txt annotation.Rds .png do - touch ${meta.id}.${group}.\${ftype} + touch ${meta.id}.\${ftype} done """ } diff --git a/modules/local/consensus_peaks/main.nf b/modules/local/consensus_peaks/main.nf deleted file mode 100644 index a1f1a1f1..00000000 --- a/modules/local/consensus_peaks/main.nf +++ /dev/null @@ -1,31 +0,0 @@ -process CONSENSUS_PEAKS { - tag "${meta.id}.${meta.group}" - label 'peaks' - label 'process_single' - - container "nciccbr/ccbr_ucsc:v1" - - input: - tuple val(meta), val(peaks) - output: - tuple val(meta), path("*.consensus_peaks.bed"), emit: peaks - - script: - if (peaks.size() > 1) { - """ - get_consensus_peaks.py \\ - --peakfiles ${peaks.join(' ')} --outbed ${meta.id}.${meta.group}.consensus_peaks.bed - """ - } - // just copy the input if there's only one peak file - else { - """ - cp ${peaks.join(' ')} ${meta.id}.${meta.group}.consensus_peaks.bed - """ - } - - stub: - """ - touch ${meta.id}.${meta.group}.consensus_peaks.bed - """ -} diff --git a/modules/local/homer/main.nf b/modules/local/homer/main.nf index 79fbe36d..7e279fd4 100644 --- a/modules/local/homer/main.nf +++ b/modules/local/homer/main.nf @@ -1,5 +1,5 @@ process HOMER_MOTIFS { - tag "${meta.id}.${meta.group}" + tag "${meta.id}" label 'peaks' label 'process_medium' @@ -11,8 +11,8 @@ process HOMER_MOTIFS { path(motif_db) output: - tuple val(meta), path("${meta.id}.${meta.group}_homer/*"), emit: motifs - tuple val(meta), path("${meta.id}.${meta.group}_homer/background.fa"), path("${meta.id}.${meta.group}_homer/target.fa"), emit: ame + tuple val(meta), path("${meta.id}_homer/*"), emit: motifs + tuple val(meta), path("${meta.id}_homer/background.fa"), path("${meta.id}_homer/target.fa"), emit: ame script: def args = de_novo ? "" : " -nomotif " @@ -20,7 +20,7 @@ process HOMER_MOTIFS { findMotifsGenome.pl \\ ${bed} \\ ${genome_fasta} \\ - ${meta.id}.${meta.group}_homer/ \\ + ${meta.id}_homer/ \\ -mknown ${motif_db} \\ -size given \\ -p ${task.cpus} \\ @@ -32,7 +32,7 @@ process HOMER_MOTIFS { stub: """ - mkdir ${meta.id}.${meta.group}_homer/ - touch ${meta.id}.${meta.group}_homer/background.fa ${meta.id}.${meta.group}_homer/target.fa + mkdir ${meta.id}_homer/ + touch ${meta.id}_homer/background.fa ${meta.id}_homer/target.fa """ } diff --git a/nextflow.config b/nextflow.config index f4c2e12a..40bb8814 100644 --- a/nextflow.config +++ b/nextflow.config @@ -77,6 +77,7 @@ params { sicer = true macs_broad = true macs_narrow = true + normalize_peaks = false chipseeker = true homer = true meme = true diff --git a/subworkflows/local/annotate.nf b/subworkflows/local/annotate.nf new file mode 100644 index 00000000..898ce723 --- /dev/null +++ b/subworkflows/local/annotate.nf @@ -0,0 +1,43 @@ +include { HOMER_MOTIFS } from "../../modules/local/homer" +include { MEME_AME } from "../../modules/local/meme" +include { CHIPSEEKER_ANNOTATE } from "../../modules/local/chipseeker/annotate" +include { CHIPSEEKER_PLOTLIST } from "../../modules/local/chipseeker/plotlist" +include { CHIPSEEKER_PEAKPLOT } from "../../modules/local/chipseeker/peakplot" + +workflow ANNOTATE { + take: + ch_peaks + genome_fasta + meme_motifs + bioc_txdb + bioc_annot + + main: + ch_plots = Channel.empty() + if (params.run.chipseeker && + bioc_txdb && bioc_annot) { + CHIPSEEKER_PEAKPLOT( ch_peaks, bioc_txdb, bioc_annot ) + + CHIPSEEKER_ANNOTATE( ch_peaks, bioc_txdb, bioc_annot ) + CHIPSEEKER_ANNOTATE.out.annot.collect() | CHIPSEEKER_PLOTLIST + ch_plots = ch_plots.mix( + CHIPSEEKER_PLOTLIST.out.plots + ) + + } + if (params.run.homer) { + HOMER_MOTIFS(ch_peaks.combine(genome_fasta), + params.homer.de_novo, + file(params.homer.jaspar_db, checkIfExists: true) + ) + + if (params.run.meme && meme_motifs) { + MEME_AME(HOMER_MOTIFS.out.ame, + meme_motifs + ) + } + } + + emit: + plots = ch_plots +} diff --git a/subworkflows/local/peaks.nf b/subworkflows/local/peaks.nf index a5e272f4..b5d71062 100644 --- a/subworkflows/local/peaks.nf +++ b/subworkflows/local/peaks.nf @@ -16,12 +16,6 @@ include { CALC_GENOME_FRAC CONCAT_PEAK_META PLOT_PEAK_WIDTHS } from "../../modules/local/peaks.nf" include { BAM_TO_BED } from "../../modules/local/bedtools.nf" -include { CONSENSUS_PEAKS } from "../../modules/local/consensus_peaks" -include { HOMER_MOTIFS } from "../../modules/local/homer" -include { MEME_AME } from "../../modules/local/meme" -include { CHIPSEEKER_ANNOTATE } from "../../modules/local/chipseeker/annotate" -include { CHIPSEEKER_PLOTLIST } from "../../modules/local/chipseeker/plotlist" -include { CHIPSEEKER_PEAKPLOT } from "../../modules/local/chipseeker/peakplot" workflow CALL_PEAKS { take: @@ -31,10 +25,6 @@ workflow CALL_PEAKS { deduped_bam frag_lengths effective_genome_size - genome_fasta - meme_motifs - bioc_txdb - bioc_annot main: genome_frac = CALC_GENOME_FRAC(chrom_sizes, effective_genome_size) @@ -134,51 +124,7 @@ workflow CALL_PEAKS { .mix(PLOT_JACCARD.out) .mix(PLOT_PEAK_WIDTHS.out) - // consensus peak calling on replicates - ch_peaks - .map{ meta, bed, tool -> - [ "${meta.sample_basename}_${tool}", meta, bed, tool,] - } - .groupTuple(by: 0) // group by the sample_basename and peak-calling tool - .set{ peak_reps } - // assert that sample_basenames & tools match - peak_reps.subscribe { basename_tool, metas, beds, tools -> - assert metas.collect{ it.sample_basename }.toSet().size() == 1 - assert tools.toSet().size() == 1 - } - peak_reps - .map { basename_tool, metas, beds, tools -> - [ [id: metas[0].sample_basename, group: tools[0]], beds ] - } - .set{ - peaks_grouped - } - peaks_grouped | CONSENSUS_PEAKS - ch_consensus_peaks = CONSENSUS_PEAKS.out.peaks - - if (params.run.chipseeker && bioc_txdb && bioc_annot) { - // TODO: change consensus peak method to keep p-value, q-value, etc for use in chipseeker peakplots - CHIPSEEKER_PEAKPLOT( ch_peaks, bioc_txdb, bioc_annot ) - CHIPSEEKER_ANNOTATE( ch_consensus_peaks, bioc_txdb, bioc_annot ) - CHIPSEEKER_ANNOTATE.out.annot.collect() | CHIPSEEKER_PLOTLIST - ch_plots = ch_plots.mix( - CHIPSEEKER_PLOTLIST.out.plots - ) - } - if (params.run.homer) { - HOMER_MOTIFS(ch_consensus_peaks.combine(genome_fasta), - params.homer.de_novo, - file(params.homer.jaspar_db, checkIfExists: true) - ) - - if (params.run.meme && meme_motifs) { - MEME_AME(HOMER_MOTIFS.out.ame, - meme_motifs - ) - } - } - emit: - peaks = ch_bam_peaks + peaks = ch_peaks plots = ch_plots }