Skip to content

Commit

Permalink
refactor: use consensus_peaks subwf from nf-modules
Browse files Browse the repository at this point in the history
  • Loading branch information
kelly-sovacool committed Dec 5, 2023
1 parent b8450a1 commit 465b361
Show file tree
Hide file tree
Showing 8 changed files with 101 additions and 128 deletions.
51 changes: 27 additions & 24 deletions bin/chipseeker_peakplot.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,39 +22,42 @@ txdb <- args$txdb %>%
rlang::sym() %>%
eval()

np <- read.table(args$peak, sep = "\t")
peak_colnames <- c(
"chrom",
"chromStart",
"chromEnd",
"name",
"score",
"strand",
"signalValue",
"pValue",
"qValue"
)
num_columns <- ncol(np)
if (num_columns == 9) {
colnames(np) <- peak_colnames
np <- np %>% mutate(peak = NA)
} else if (num_columns == 10) {
colnames(np) <- c(peak_colnames, "peak")
} else {
stop(paste("Expected 9 or 10 columns in peak file, but", num_columns, "given"))
read_peaks <- function(peak_file) {
peak_colnames <- c(
"chrom",
"start",
"end",
"peakID",
"score",
"strand",
"signal",
"pvalue",
"qvalue",
"peak"
)
peaks <- read.table(peak_file, sep = "\t")
if (!dplyr::between(ncol(peaks), 9, 10)) {
stop("Unexpected number of columns in peak file")
}
colnames(peaks) <- peak_colnames[seq_len(ncol(peaks))]
return(peaks)
}
np <- read_peaks(args$peak)

# plots for individual peak file
peaks <- GenomicRanges::GRanges(
seqnames = np$chrom,
ranges = IRanges(np$chromStart, np$chromEnd),
qValue = np$qValue
ranges = IRanges(np$start, np$end),
pvalue = np$pvalue,
qvalue = np$qvalue
)
print(peaks)
plots <- list(
covplot = covplot(peaks, weightCol = "qValue"),
covplot = covplot(peaks, weightCol = "qvalue"),
plotPeakProf2 = plotPeakProf2(
peak = peaks, upstream = rel(0.2), downstream = rel(0.2),
conf = 0.95, by = "gene", type = "body", nbin = 800,
TxDb = txdb, weightCol = "qValue", ignore_strand = F
TxDb = txdb, weightCol = "qvalue", ignore_strand = TRUE
)
)

Expand Down
27 changes: 19 additions & 8 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,13 @@ log.info """\

include { INPUT_CHECK } from './subworkflows/local/input_check.nf'
include { PREPARE_GENOME } from './subworkflows/local/prepare_genome.nf'
include { FILTER_BLACKLIST } from './subworkflows/CCBR/filter_blacklist'
include { FILTER_BLACKLIST } from './subworkflows/CCBR/filter_blacklist/'
include { ALIGN_GENOME } from "./subworkflows/local/align.nf"
include { DEDUPLICATE } from "./subworkflows/local/deduplicate.nf"
include { QC } from './subworkflows/local/qc.nf'
include { CALL_PEAKS } from './subworkflows/local/peaks.nf'

include { CONSENSUS_PEAKS } from './subworkflows/CCBR/consensus_peaks/'
include { ANNOTATE } from './subworkflows/local/annotate.nf'

// MODULES
include { CUTADAPT } from "./modules/CCBR/cutadapt"
Expand Down Expand Up @@ -92,13 +93,23 @@ workflow CHIPSEQ {
deduped_tagalign,
deduped_bam,
frag_lengths,
effective_genome_size,
PREPARE_GENOME.out.fasta,
PREPARE_GENOME.out.meme_motifs,
PREPARE_GENOME.out.bioc_txdb,
PREPARE_GENOME.out.bioc_annot
effective_genome_size
)
ch_multiqc = ch_multiqc.mix(CALL_PEAKS.out.plots)

// consensus peak calling on replicates
ch_peaks_grouped = CALL_PEAKS.out.peaks
.map{ meta, bed, tool ->
[ [ group: "${meta.sample_basename}.${tool}" ], bed ]
}
CONSENSUS_PEAKS( ch_peaks_grouped, params.run.normalize_peaks )

ANNOTATE(CONSENSUS_PEAKS.out.peaks,
PREPARE_GENOME.out.fasta,
PREPARE_GENOME.out.meme_motifs,
PREPARE_GENOME.out.bioc_txdb,
PREPARE_GENOME.out.bioc_annot)
ch_multiqc = ch_multiqc.mix(CALL_PEAKS.out.plots, ANNOTATE.out.plots)

}

MULTIQC(
Expand Down
8 changes: 4 additions & 4 deletions modules/local/chipseeker/peakplot/main.nf
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
process CHIPSEEKER_PEAKPLOT {
tag "${meta.id}.${group}"
tag "${meta.id}"
label 'peaks'
label 'process_medium'

container 'nciccbr/ccbr_chipseeker:1.1.2'

input:
tuple val(meta), path(bed), val(group)
tuple val(meta), path(bed)
val(txdb)
val(annot_db)

Expand All @@ -17,7 +17,7 @@ process CHIPSEEKER_PEAKPLOT {
"""
chipseeker_peakplot.R \\
--peak ${bed} \\
--outfile-prefix ${meta.id}.${group} \\
--outfile-prefix ${meta.id} \\
--genome-txdb ${txdb} \\
--genome-annot ${annot_db}
"""
Expand All @@ -26,7 +26,7 @@ process CHIPSEEKER_PEAKPLOT {
"""
for ftype in annotated.txt summary.txt genelist.txt annotation.Rds .png
do
touch ${meta.id}.${group}.\${ftype}
touch ${meta.id}.\${ftype}
done
"""
}
31 changes: 0 additions & 31 deletions modules/local/consensus_peaks/main.nf

This file was deleted.

12 changes: 6 additions & 6 deletions modules/local/homer/main.nf
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
process HOMER_MOTIFS {
tag "${meta.id}.${meta.group}"
tag "${meta.id}"
label 'peaks'
label 'process_medium'

Expand All @@ -11,16 +11,16 @@ process HOMER_MOTIFS {
path(motif_db)

output:
tuple val(meta), path("${meta.id}.${meta.group}_homer/*"), emit: motifs
tuple val(meta), path("${meta.id}.${meta.group}_homer/background.fa"), path("${meta.id}.${meta.group}_homer/target.fa"), emit: ame
tuple val(meta), path("${meta.id}_homer/*"), emit: motifs
tuple val(meta), path("${meta.id}_homer/background.fa"), path("${meta.id}_homer/target.fa"), emit: ame

script:
def args = de_novo ? "" : " -nomotif "
"""
findMotifsGenome.pl \\
${bed} \\
${genome_fasta} \\
${meta.id}.${meta.group}_homer/ \\
${meta.id}_homer/ \\
-mknown ${motif_db} \\
-size given \\
-p ${task.cpus} \\
Expand All @@ -32,7 +32,7 @@ process HOMER_MOTIFS {

stub:
"""
mkdir ${meta.id}.${meta.group}_homer/
touch ${meta.id}.${meta.group}_homer/background.fa ${meta.id}.${meta.group}_homer/target.fa
mkdir ${meta.id}_homer/
touch ${meta.id}_homer/background.fa ${meta.id}_homer/target.fa
"""
}
1 change: 1 addition & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ params {
sicer = true
macs_broad = true
macs_narrow = true
normalize_peaks = false
chipseeker = true
homer = true
meme = true
Expand Down
43 changes: 43 additions & 0 deletions subworkflows/local/annotate.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
include { HOMER_MOTIFS } from "../../modules/local/homer"
include { MEME_AME } from "../../modules/local/meme"
include { CHIPSEEKER_ANNOTATE } from "../../modules/local/chipseeker/annotate"
include { CHIPSEEKER_PLOTLIST } from "../../modules/local/chipseeker/plotlist"
include { CHIPSEEKER_PEAKPLOT } from "../../modules/local/chipseeker/peakplot"

workflow ANNOTATE {
take:
ch_peaks
genome_fasta
meme_motifs
bioc_txdb
bioc_annot

main:
ch_plots = Channel.empty()
if (params.run.chipseeker &&
bioc_txdb && bioc_annot) {
CHIPSEEKER_PEAKPLOT( ch_peaks, bioc_txdb, bioc_annot )

CHIPSEEKER_ANNOTATE( ch_peaks, bioc_txdb, bioc_annot )
CHIPSEEKER_ANNOTATE.out.annot.collect() | CHIPSEEKER_PLOTLIST
ch_plots = ch_plots.mix(
CHIPSEEKER_PLOTLIST.out.plots
)

}
if (params.run.homer) {
HOMER_MOTIFS(ch_peaks.combine(genome_fasta),
params.homer.de_novo,
file(params.homer.jaspar_db, checkIfExists: true)
)

if (params.run.meme && meme_motifs) {
MEME_AME(HOMER_MOTIFS.out.ame,
meme_motifs
)
}
}

emit:
plots = ch_plots
}
56 changes: 1 addition & 55 deletions subworkflows/local/peaks.nf
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,6 @@ include { CALC_GENOME_FRAC
CONCAT_PEAK_META
PLOT_PEAK_WIDTHS } from "../../modules/local/peaks.nf"
include { BAM_TO_BED } from "../../modules/local/bedtools.nf"
include { CONSENSUS_PEAKS } from "../../modules/local/consensus_peaks"
include { HOMER_MOTIFS } from "../../modules/local/homer"
include { MEME_AME } from "../../modules/local/meme"
include { CHIPSEEKER_ANNOTATE } from "../../modules/local/chipseeker/annotate"
include { CHIPSEEKER_PLOTLIST } from "../../modules/local/chipseeker/plotlist"
include { CHIPSEEKER_PEAKPLOT } from "../../modules/local/chipseeker/peakplot"

workflow CALL_PEAKS {
take:
Expand All @@ -31,10 +25,6 @@ workflow CALL_PEAKS {
deduped_bam
frag_lengths
effective_genome_size
genome_fasta
meme_motifs
bioc_txdb
bioc_annot

main:
genome_frac = CALC_GENOME_FRAC(chrom_sizes, effective_genome_size)
Expand Down Expand Up @@ -134,51 +124,7 @@ workflow CALL_PEAKS {
.mix(PLOT_JACCARD.out)
.mix(PLOT_PEAK_WIDTHS.out)

// consensus peak calling on replicates
ch_peaks
.map{ meta, bed, tool ->
[ "${meta.sample_basename}_${tool}", meta, bed, tool,]
}
.groupTuple(by: 0) // group by the sample_basename and peak-calling tool
.set{ peak_reps }
// assert that sample_basenames & tools match
peak_reps.subscribe { basename_tool, metas, beds, tools ->
assert metas.collect{ it.sample_basename }.toSet().size() == 1
assert tools.toSet().size() == 1
}
peak_reps
.map { basename_tool, metas, beds, tools ->
[ [id: metas[0].sample_basename, group: tools[0]], beds ]
}
.set{
peaks_grouped
}
peaks_grouped | CONSENSUS_PEAKS
ch_consensus_peaks = CONSENSUS_PEAKS.out.peaks

if (params.run.chipseeker && bioc_txdb && bioc_annot) {
// TODO: change consensus peak method to keep p-value, q-value, etc for use in chipseeker peakplots
CHIPSEEKER_PEAKPLOT( ch_peaks, bioc_txdb, bioc_annot )
CHIPSEEKER_ANNOTATE( ch_consensus_peaks, bioc_txdb, bioc_annot )
CHIPSEEKER_ANNOTATE.out.annot.collect() | CHIPSEEKER_PLOTLIST
ch_plots = ch_plots.mix(
CHIPSEEKER_PLOTLIST.out.plots
)
}
if (params.run.homer) {
HOMER_MOTIFS(ch_consensus_peaks.combine(genome_fasta),
params.homer.de_novo,
file(params.homer.jaspar_db, checkIfExists: true)
)

if (params.run.meme && meme_motifs) {
MEME_AME(HOMER_MOTIFS.out.ame,
meme_motifs
)
}
}

emit:
peaks = ch_bam_peaks
peaks = ch_peaks
plots = ch_plots
}

0 comments on commit 465b361

Please sign in to comment.