refactor: use consensus_peaks subwf from nf-modules

CCBR · Dec 5, 2023 · 465b361 · 465b361
1 parent b8450a1
commit 465b361
Show file tree

Hide file tree

Showing 8 changed files with 101 additions and 128 deletions.
diff --git a/bin/chipseeker_peakplot.R b/bin/chipseeker_peakplot.R
@@ -22,39 +22,42 @@ txdb <- args$txdb %>%
   rlang::sym() %>%
   eval()
 
-np <- read.table(args$peak, sep = "\t")
-peak_colnames <- c(
-  "chrom",
-  "chromStart",
-  "chromEnd",
-  "name",
-  "score",
-  "strand",
-  "signalValue",
-  "pValue",
-  "qValue"
-)
-num_columns <- ncol(np)
-if (num_columns == 9) {
-  colnames(np) <- peak_colnames
-  np <- np %>% mutate(peak = NA)
-} else if (num_columns == 10) {
-  colnames(np) <- c(peak_colnames, "peak")
-} else {
-  stop(paste("Expected 9 or 10 columns in peak file, but", num_columns, "given"))
+read_peaks <- function(peak_file) {
+  peak_colnames <- c(
+    "chrom",
+    "start",
+    "end",
+    "peakID",
+    "score",
+    "strand",
+    "signal",
+    "pvalue",
+    "qvalue",
+    "peak"
+  )
+  peaks <- read.table(peak_file, sep = "\t")
+  if (!dplyr::between(ncol(peaks), 9, 10)) {
+    stop("Unexpected number of columns in peak file")
+  }
+  colnames(peaks) <- peak_colnames[seq_len(ncol(peaks))]
+  return(peaks)
 }
+np <- read_peaks(args$peak)
+
 # plots for individual peak file
 peaks <- GenomicRanges::GRanges(
   seqnames = np$chrom,
-  ranges = IRanges(np$chromStart, np$chromEnd),
-  qValue = np$qValue
+  ranges = IRanges(np$start, np$end),
+  pvalue = np$pvalue,
+  qvalue = np$qvalue
 )
+print(peaks)
 plots <- list(
-  covplot = covplot(peaks, weightCol = "qValue"),
+  covplot = covplot(peaks, weightCol = "qvalue"),
   plotPeakProf2 = plotPeakProf2(
     peak = peaks, upstream = rel(0.2), downstream = rel(0.2),
     conf = 0.95, by = "gene", type = "body", nbin = 800,
-    TxDb = txdb, weightCol = "qValue", ignore_strand = F
+    TxDb = txdb, weightCol = "qvalue", ignore_strand = TRUE
   )
 )
 

diff --git a/main.nf b/main.nf
@@ -21,12 +21,13 @@ log.info """\
 
 include { INPUT_CHECK              } from './subworkflows/local/input_check.nf'
 include { PREPARE_GENOME           } from './subworkflows/local/prepare_genome.nf'
-include { FILTER_BLACKLIST         } from './subworkflows/CCBR/filter_blacklist'
+include { FILTER_BLACKLIST         } from './subworkflows/CCBR/filter_blacklist/'
 include { ALIGN_GENOME             } from "./subworkflows/local/align.nf"
 include { DEDUPLICATE              } from "./subworkflows/local/deduplicate.nf"
 include { QC                       } from './subworkflows/local/qc.nf'
 include { CALL_PEAKS               } from './subworkflows/local/peaks.nf'
-
+include { CONSENSUS_PEAKS          } from './subworkflows/CCBR/consensus_peaks/'
+include { ANNOTATE                 } from './subworkflows/local/annotate.nf'
 
 // MODULES
 include { CUTADAPT                 } from "./modules/CCBR/cutadapt"
@@ -92,13 +93,23 @@ workflow CHIPSEQ {
                    deduped_tagalign,
                    deduped_bam,
                    frag_lengths,
-                   effective_genome_size,
-                   PREPARE_GENOME.out.fasta,
-                   PREPARE_GENOME.out.meme_motifs,
-                   PREPARE_GENOME.out.bioc_txdb,
-                   PREPARE_GENOME.out.bioc_annot
+                   effective_genome_size
                    )
-        ch_multiqc = ch_multiqc.mix(CALL_PEAKS.out.plots)
+
+        // consensus peak calling on replicates
+        ch_peaks_grouped = CALL_PEAKS.out.peaks
+            .map{ meta, bed, tool ->
+                [ [ group: "${meta.sample_basename}.${tool}" ], bed ]
+            }
+        CONSENSUS_PEAKS( ch_peaks_grouped, params.run.normalize_peaks )
+
+        ANNOTATE(CONSENSUS_PEAKS.out.peaks,
+                 PREPARE_GENOME.out.fasta,
+                 PREPARE_GENOME.out.meme_motifs,
+                 PREPARE_GENOME.out.bioc_txdb,
+                 PREPARE_GENOME.out.bioc_annot)
+        ch_multiqc = ch_multiqc.mix(CALL_PEAKS.out.plots, ANNOTATE.out.plots)
+
     }
 
     MULTIQC(

diff --git a/modules/local/chipseeker/peakplot/main.nf b/modules/local/chipseeker/peakplot/main.nf
@@ -1,12 +1,12 @@
 process CHIPSEEKER_PEAKPLOT {
-    tag "${meta.id}.${group}"
+    tag "${meta.id}"
     label 'peaks'
     label 'process_medium'
 
     container 'nciccbr/ccbr_chipseeker:1.1.2'
 
     input:
-        tuple val(meta), path(bed), val(group)
+        tuple val(meta), path(bed)
         val(txdb)
         val(annot_db)
 
@@ -17,7 +17,7 @@ process CHIPSEEKER_PEAKPLOT {
     """
     chipseeker_peakplot.R \\
         --peak ${bed} \\
-        --outfile-prefix ${meta.id}.${group} \\
+        --outfile-prefix ${meta.id} \\
         --genome-txdb ${txdb} \\
         --genome-annot ${annot_db}
     """
@@ -26,7 +26,7 @@ process CHIPSEEKER_PEAKPLOT {
     """
     for ftype in annotated.txt summary.txt genelist.txt annotation.Rds .png
     do
-        touch ${meta.id}.${group}.\${ftype}
+        touch ${meta.id}.\${ftype}
     done
     """
 }
diff --git a/modules/local/consensus_peaks/main.nf b/modules/local/consensus_peaks/main.nf
diff --git a/modules/local/homer/main.nf b/modules/local/homer/main.nf
@@ -1,5 +1,5 @@
 process HOMER_MOTIFS {
-    tag "${meta.id}.${meta.group}"
+    tag "${meta.id}"
     label 'peaks'
     label 'process_medium'
 
@@ -11,16 +11,16 @@ process HOMER_MOTIFS {
         path(motif_db)
 
     output:
-        tuple val(meta), path("${meta.id}.${meta.group}_homer/*"), emit: motifs
-        tuple val(meta), path("${meta.id}.${meta.group}_homer/background.fa"), path("${meta.id}.${meta.group}_homer/target.fa"), emit: ame
+        tuple val(meta), path("${meta.id}_homer/*"), emit: motifs
+        tuple val(meta), path("${meta.id}_homer/background.fa"), path("${meta.id}_homer/target.fa"), emit: ame
 
     script:
     def args = de_novo ? "" : " -nomotif "
     """
     findMotifsGenome.pl \\
         ${bed} \\
         ${genome_fasta} \\
-        ${meta.id}.${meta.group}_homer/ \\
+        ${meta.id}_homer/ \\
         -mknown ${motif_db} \\
         -size given \\
         -p ${task.cpus} \\
@@ -32,7 +32,7 @@ process HOMER_MOTIFS {
 
     stub:
     """
-    mkdir ${meta.id}.${meta.group}_homer/
-    touch ${meta.id}.${meta.group}_homer/background.fa ${meta.id}.${meta.group}_homer/target.fa
+    mkdir ${meta.id}_homer/
+    touch ${meta.id}_homer/background.fa ${meta.id}_homer/target.fa
     """
 }
diff --git a/nextflow.config b/nextflow.config
@@ -77,6 +77,7 @@ params {
         sicer = true
         macs_broad = true
         macs_narrow = true
+        normalize_peaks = false
         chipseeker = true
         homer = true
         meme = true

diff --git a/subworkflows/local/annotate.nf b/subworkflows/local/annotate.nf
@@ -0,0 +1,43 @@
+include { HOMER_MOTIFS        } from "../../modules/local/homer"
+include { MEME_AME            } from "../../modules/local/meme"
+include { CHIPSEEKER_ANNOTATE } from "../../modules/local/chipseeker/annotate"
+include { CHIPSEEKER_PLOTLIST } from "../../modules/local/chipseeker/plotlist"
+include { CHIPSEEKER_PEAKPLOT } from "../../modules/local/chipseeker/peakplot"
+
+workflow ANNOTATE {
+    take:
+        ch_peaks
+        genome_fasta
+        meme_motifs
+        bioc_txdb
+        bioc_annot
+
+    main:
+        ch_plots = Channel.empty()
+        if (params.run.chipseeker &&
+         bioc_txdb && bioc_annot) {
+            CHIPSEEKER_PEAKPLOT( ch_peaks, bioc_txdb, bioc_annot  )
+
+            CHIPSEEKER_ANNOTATE( ch_peaks, bioc_txdb, bioc_annot )
+            CHIPSEEKER_ANNOTATE.out.annot.collect() | CHIPSEEKER_PLOTLIST
+            ch_plots = ch_plots.mix(
+                CHIPSEEKER_PLOTLIST.out.plots
+            )
+
+        }
+        if (params.run.homer) {
+            HOMER_MOTIFS(ch_peaks.combine(genome_fasta),
+                         params.homer.de_novo,
+                         file(params.homer.jaspar_db, checkIfExists: true)
+                        )
+
+            if (params.run.meme && meme_motifs) {
+                MEME_AME(HOMER_MOTIFS.out.ame,
+                         meme_motifs
+                        )
+            }
+        }
+
+    emit:
+        plots = ch_plots
+}
diff --git a/subworkflows/local/peaks.nf b/subworkflows/local/peaks.nf
@@ -16,12 +16,6 @@ include { CALC_GENOME_FRAC
           CONCAT_PEAK_META
           PLOT_PEAK_WIDTHS    } from "../../modules/local/peaks.nf"
 include { BAM_TO_BED          } from "../../modules/local/bedtools.nf"
-include { CONSENSUS_PEAKS     } from "../../modules/local/consensus_peaks"
-include { HOMER_MOTIFS        } from "../../modules/local/homer"
-include { MEME_AME            } from "../../modules/local/meme"
-include { CHIPSEEKER_ANNOTATE } from "../../modules/local/chipseeker/annotate"
-include { CHIPSEEKER_PLOTLIST } from "../../modules/local/chipseeker/plotlist"
-include { CHIPSEEKER_PEAKPLOT } from "../../modules/local/chipseeker/peakplot"
 
 workflow CALL_PEAKS {
     take:
@@ -31,10 +25,6 @@ workflow CALL_PEAKS {
         deduped_bam
         frag_lengths
         effective_genome_size
-        genome_fasta
-        meme_motifs
-        bioc_txdb
-        bioc_annot
 
     main:
         genome_frac = CALC_GENOME_FRAC(chrom_sizes, effective_genome_size)
@@ -134,51 +124,7 @@ workflow CALL_PEAKS {
             .mix(PLOT_JACCARD.out)
             .mix(PLOT_PEAK_WIDTHS.out)
 
-        // consensus peak calling on replicates
-        ch_peaks
-            .map{ meta, bed, tool ->
-                [ "${meta.sample_basename}_${tool}", meta, bed, tool,]
-            }
-            .groupTuple(by: 0) // group by the sample_basename and peak-calling tool
-            .set{ peak_reps }
-        // assert that sample_basenames & tools match
-        peak_reps.subscribe { basename_tool, metas, beds, tools ->
-            assert metas.collect{ it.sample_basename }.toSet().size() == 1
-            assert tools.toSet().size() == 1
-        }
-        peak_reps
-            .map { basename_tool, metas, beds, tools ->
-                [ [id: metas[0].sample_basename, group: tools[0]], beds ]
-            }
-            .set{
-                peaks_grouped
-            }
-        peaks_grouped | CONSENSUS_PEAKS
-        ch_consensus_peaks = CONSENSUS_PEAKS.out.peaks
-
-        if (params.run.chipseeker && bioc_txdb && bioc_annot) {
-            // TODO: change consensus peak method to keep p-value, q-value, etc for use in chipseeker peakplots
-            CHIPSEEKER_PEAKPLOT( ch_peaks, bioc_txdb, bioc_annot  )
-            CHIPSEEKER_ANNOTATE( ch_consensus_peaks, bioc_txdb, bioc_annot )
-            CHIPSEEKER_ANNOTATE.out.annot.collect() | CHIPSEEKER_PLOTLIST
-            ch_plots = ch_plots.mix(
-                CHIPSEEKER_PLOTLIST.out.plots
-            )
-        }
-        if (params.run.homer) {
-            HOMER_MOTIFS(ch_consensus_peaks.combine(genome_fasta),
-                         params.homer.de_novo,
-                         file(params.homer.jaspar_db, checkIfExists: true)
-                        )
-
-            if (params.run.meme && meme_motifs) {
-                MEME_AME(HOMER_MOTIFS.out.ame,
-                         meme_motifs
-                        )
-            }
-        }
-
     emit:
-        peaks = ch_bam_peaks
+        peaks = ch_peaks
         plots = ch_plots
 }