Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

move code to dsl2 #1

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
251 changes: 138 additions & 113 deletions main.nf
Original file line number Diff line number Diff line change
@@ -1,147 +1,105 @@
#!/usr/bin/env nextflow


//help function for the tool
def show_help (){
log.info IARC_Header()
log.info tool_header()
log.info"""
Usage:
The typical command for running the pipeline is as follows:
nextflow run iarc/bam2cram --bams input_folder --fasta ref.fa --fai ref.fa.fai -profile singularity
Mandatory arguments:
--bams [directory] input folder with BAM files and indexes
References
--fasta [file] Path to fasta reference to encode the CRAM file
--fai [file] Path to fasta reference index.
Input alternatives:
--bam_csv file with tabular data for each sample to process [label bam index ]
-profile [str] Configuration profile to use.
Available: singularity
Output:
--output_folder ouput folder [def: ./results]
""".stripIndent()
}

// we star coding the pipeline
params.help = null
params.bams = null
params.bam_csv = null
params.fasta= null
params.fai = null

// Show help message
if (params.help) exit 0, show_help()

//print header and tool name
log.info IARC_Header()
log.info tool_header()

//check fasta and fasta index
if (!params.fasta || !params.fai) exit 1, "The reference fasta file with its index need to be provided!"

//we check the reference
ch_fasta = Channel.value(file(params.fasta)).ifEmpty{exit 1, "Fasta file not found: ${params.fasta}"}
ch_fai = Channel.value(file(params.fai)).ifEmpty{exit 1, "fai index file not found: ${params.fai}"}
//check that BAM input is provided!
if(!params.bam_csv && !params.bams) exit 1, "No --bam_csv nor --bams options provided!"

//we print the parameters
log.info "\n"
log.info "-\033[2m------------------Calling PARAMETERS--------------------\033[0m-"
log.info params.collect{ k,v -> "${k.padRight(18)}: $v"}.join("\n")
log.info "-\033[2m--------------------------------------------------------\033[0m-"
log.info "\n"

if(params.bam_csv) {
Channel.fromPath(file(params.bam_csv)).splitCsv(header: true, sep: '\t', strip: true)
.map{row -> [ row.label, file(row.bam), file(row.index)]}
.ifEmpty{exit 1, "params.bams_csv was empty - no input files supplied" }
.into { inputbams; bamstats; sizebams }

}else{
if(file(params.bams).listFiles().findAll { it.name ==~ /.*bam/ }.size() > 0){
bams = Channel.fromPath( params.bams+'/*.bam' )
.map {path -> [ path.name.replace(".bam",""),path]}
bams_index = Channel.fromPath( params.bams+'/*.bam.bai')
.map { path -> [ path.name.replace(".bam.bai",""), path ] }
//we create the chanel
files = bams.join(bams_index)
files.into {inputbams; bamstats; sizebams}
}else{
println "ERROR: input folder ${params.bams} contains no BAM files"; System.exit(1)
}
}



//map the rna-seq reads to the genome

process bam2cram{

tag "${sample}-2cram"
//we can remove this to don't keep the bam files
publishDir "${params.output_folder}/CRAM", mode: 'copy'
publishDir "${params.outdir}/CRAM", mode: 'copy'

input:
set val(sample), file(bam), file(bam_index) from inputbams
file(ref) from ch_fasta
file(ref_index) from ch_fai
tuple val(sample), path(bam), path(bam_index)
path ref
path ref_index

output:
//output CRAM files
set val(sample), file("${sample}.cram"), file("${sample}.cram.crai") into cramfiles, cramstats, sizecrams
tuple val(sample), file("${sample}.cram"), file("${sample}.cram.crai"), emit: crams

script:

if(params.debug == true){
"""
echo samtools view -C -T ${ref} ${bam} -o ${sample}.cram
echo samtools index ${sample}.cram
touch ${sample}.cram ${sample}.cram.crai
"""
}else{
"""
samtools view -C -T ${ref} ${bam} -o ${sample}.cram
samtools index ${sample}.cram
samtools view -C -T ${ref} ${bam} -o ${sample}.cram
samtools index ${sample}.cram
"""
}
}


//we compute some stat for original bam files
process stats_bams{
tag "${sample}-bam_stats"
publishDir "${params.output_folder}/qc/bam", mode: 'copy'
publishDir "${params.outdir}/qc/bam", mode: 'copy'
input:
set val(sample), file(bam), file(bam_index) from bamstats
tuple val(sample), file(bam), file(bam_index)
output:
set val(sample), file("${sample}.bam.flagstat"), file("${sample}.bam.stats") into bam_qc
tuple val(sample), file("${sample}.bam.flagstat"), file("${sample}.bam.stats"), emit: bam_qc
script:
if(params.debug == true){
"""
echo samtools flagstat ${bam} > ${sample}.bam.flagstat
echo samtools stats ${bam} > ${sample}.bam.stats
touch ${sample}.bam.flagstat ${sample}.bam.stats
"""
}else{
"""
samtools flagstat ${bam} > ${sample}.bam.flagstat
samtools stats ${bam} > ${sample}.bam.stats
"""
}
}


//we compute some stat for original bam files
process stats_crams{
tag "${sample}-cram_stats"

publishDir "${params.output_folder}/qc/cram", mode: 'copy'
publishDir "${params.outdir}/qc/cram", mode: 'copy'
input:
set val(sample), file(cram), file(cram_index) from cramstats
tuple val(sample), file(cram), file(cram_index)
output:
set val(sample), file("${sample}.cram.flagstat"), file("${sample}.cram.stats") into cram_qc
tuple val(sample), file("${sample}.cram.flagstat"), file("${sample}.cram.stats"), emit: cram_qc
script:
if(params.debug == true){
"""
echo samtools flagstat ${cram} > ${sample}.cram.flagstat
echo samtools stats ${cram} > ${sample}.cram.stats
touch ${sample}.cram.flagstat
touch ${sample}.cram.stats
"""

}else{
"""
samtools flagstat ${cram} > ${sample}.cram.flagstat
samtools stats ${cram} > ${sample}.cram.stats
"""
}
}


//we join bam and cram qc output to compare them
cram_bam_size=sizecrams.join(sizebams)
cram_bam_qc=cram_qc.join(bam_qc)
//cram_bam_size=sizecrams.join(sizebams)
//cram_bam_qc=cram_qc.join(bam_qc)

//we merge both cram_qc and bam_qc to check if they are identical
process check_conversion{
tag "${sample}-check"
publishDir "${params.output_folder}/qc/check", mode: 'copy'
publishDir "${params.outdir}/qc/check", mode: 'copy'

input:
set val(sample), file(c_fs), file(c_stat), file(b_fs), file (b_stat) from cram_bam_qc
set val(sample), file(cram), file(cram_index), file(bam), file (bam_index) from cram_bam_size
//file qc_check from cram_bam_qc.collect()
tuple val(sample), file(c_fs), file(c_stat), file(b_fs), file (b_stat)
tuple val(sample), file(cram), file(cram_index), file(bam), file (bam_index)

output:
file("${sample}_check.report.txt") into report_qc

path("${sample}_check.report.txt") , emit: report_qc

script:
"""
if diff ${c_fs} ${b_fs} > /dev/null
Expand All @@ -168,29 +126,96 @@ process check_conversion{
"""
}

report_qc.collectFile(name: 'bam2cram_summary.txt', storeDir: params.output_folder, seed: 'ID\tflagstat\tstats\tBAM_size\tCRAM_size\n', newLine: false, skip: 1)
//report_qc.collectFile(name: 'bam2cram_summary.txt', storeDir: params.output_folder, seed: 'ID\tflagstat\tstats\tBAM_size\tCRAM_size\n', newLine: false, skip: 1)

workflow{


// Show help message
if (params.help) exit 0, show_help()


//print header and tool name
DGL_Header()
tool_header()

//check fasta and fasta index
if (!params.fasta || !params.fai) exit 1, "The reference fasta file with its fai index have to be provided!"

//we check the reference
//ch_fasta = Channel.fromPath(params.fasta).ifEmpty{exit 1, "Fasta file not found: ${params.fasta}"}
//ch_fai = Channel.fromPath(params.fai).ifEmpty{exit 1, "fai index file not found: ${params.fai}"}

ch_fasta = file(params.fasta)
ch_fai = file(params.fai)
//check that BAM input is provided!
if(!params.bam_csv && !params.bams) exit 1, "No --bam_csv nor --bams options provided!"

//we print the parameters
log.info "\n"
log.info "-------------------Calling PARAMETERS--------------------"
log.info params.collect{ k,v -> "${k.padRight(18)}: $v"}.join("\n")
log.info "--------------------------------------------------------"
log.info "\n"
if(params.bam_csv){
read_bams_ch=Channel.fromPath(params.bam_csv) \
| splitCsv(header:true) \
| map { row-> tuple(row.sampleId, file(row.bam), file(row.bai))}
}
B2C=bam2cram(read_bams_ch,ch_fasta,ch_fai)
BMS=stats_bams(read_bams_ch)
CMS=stats_crams(B2C.crams)
//we check conversion
CRC=check_conversion(CMS.join(BMS),B2C.crams.join(read_bams_ch))
//we collect all stats
CRC.report_qc.collectFile(name: 'bam2cram_summary.txt', storeDir: params.outdir, seed: 'ID\tflagstat\tstats\tBAM_size\tCRAM_size\n', newLine: false, skip: 1)
}


//help function for the tool
def show_help (){
DGL_Header()
tool_header()
log.info"""
Usage:
The typical command for running the pipeline is as follows:
nextflow run digenomalab/bam2cram --bam_csv bams.csv --fasta ref.fa --fai ref.fa.fai -profile kutral
Mandatory arguments:
--bam_csv file with tabular data for each sample to process [label bam index ]

References
--fasta [file] Path to fasta reference to encode the CRAM file
--fai [file] Path to fasta reference index.
Input alternatives:
-profile [str] Configuration profile to use.
Available: kutral
Output:
--outdir ouput folder [def: ./results]
""".stripIndent()
}


//header for the IARC tools
// header for the Di Genoma Lab tools
// the logo was generated using the following page
// http://patorjk.com/software/taag (ANSI logo generator)
def IARC_Header (){
return """
#################################################################################
# ██╗ █████╗ ██████╗ ██████╗██████╗ ██╗ ██████╗ ██╗███╗ ██╗███████╗ ██████╗ #
# ██║██╔══██╗██╔══██╗██╔════╝██╔══██╗██║██╔═══██╗██║████╗ ██║██╔════╝██╔═══██╗ #
# ██║███████║██████╔╝██║ ██████╔╝██║██║ ██║██║██╔██╗ ██║█████╗ ██║ ██║ #
# ██║██╔══██║██╔══██╗██║ ██╔══██╗██║██║ ██║██║██║╚██╗██║██╔══╝ ██║ ██║ #
# ██║██║ ██║██║ ██║╚██████╗██████╔╝██║╚██████╔╝██║██║ ╚████║██║ ╚██████╔╝ #
# ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝ ╚═════╝╚═════╝ ╚═╝ ╚═════╝ ╚═╝╚═╝ ╚═══╝╚═╝ ╚═════╝ #
# Nextflow pipelines for cancer genomics.########################################
"""
def DGL_Header (){

log.info"""
#########################################################################
# _____ _ _____ _ _ #
# | __ \\(_)/ ____| | | | | #
# | | | |_| | __ ___ _ __ ___ _ __ ___ __ _| | __ _| |__ #
# | | | | | | |_ |/ _ \\ '_ \\ / _ \\| '_ ` _ \\ / _` | | / _` | '_ \\ #
# | |__| | | |__| | __/ | | | (_) | | | | | | (_| | |___| (_| | |_) | #
# |_____/|_|\\_____|\\___|_| |_|\\___/|_| |_| |_|\\__,_|______\\__,_|_.__/ #
# Workflows for genomics.################################################
""".stripIndent()
}

//this use ANSI colors to make a short tool description
//useful url: http://www.lihaoyi.com/post/BuildyourownCommandLinewithANSIescapecodes.html
def tool_header (){
return """
def tool_header(){
log.info"""
BAM\u001b[32;1m 2\u001b[33;1m CRAM\u001b[31;1m (${workflow.manifest.version})
"""
""".stripIndent()
}
44 changes: 42 additions & 2 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,18 @@ manifest {
homePage = 'https://github.com/adigenova/bam2cram'
description = 'bam to cram conversion'
mainScript = 'main.nf'
version = 1.0
version = 2.0
}

params{
// we star coding the pipeline
help = null
bams = null
bam_csv = null
fasta= null
fai = null
debug= false
outdir= "results"
}

profiles {
Expand All @@ -20,10 +31,39 @@ profiles {
process.container = 'adigenova/bam2cram:v1.0'
pullTimeout = "200 min"
}
kutral{
singularity.enabled = true
singularity.autoMonts = true
process.container = 'adigenova/bam2cram:v1.0'
pullTimeout = "200 min"
singularity.runOptions = ' --bind /mnt/beegfs/labs/ '
docker.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
process.executor = 'slurm'
process.queue = 'uohhm'
}
}

process {
shell = ['/bin/bash','-o','pipefail']
//process requirements for running
withName: bam2cram {
memory = 10.GB
cpus = 1
}
withName: stats_bams{
memory = 10.GB
cpus = 1
}
withName: stats_crams{
memory = 10.GB
cpus = 1
}
withName: check_conversion{
memory = 10.GB
cpus = 1
}
}

params.output_folder = "./results"
11 changes: 0 additions & 11 deletions test/makefile

This file was deleted.