Skip to content

Commit

Permalink
Merge pull request #2 from GenomiqueENS/dev
Browse files Browse the repository at this point in the history
  • Loading branch information
Salome-Brunon authored Nov 18, 2024
2 parents 66b70ee + 0c1d7a6 commit 1e12af7
Show file tree
Hide file tree
Showing 12 changed files with 70 additions and 71 deletions.
13 changes: 6 additions & 7 deletions bin/samplesheet2yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import sys

# Function to convert CSV to the exact YAML structure
def csv_to_exact_yaml(csv_file, yaml_file, path_prefix=None):
def csv_to_exact_yaml(csv_file, yaml_file):
data = {}

# Reading the CSV file and grouping data by 'condition'
Expand All @@ -14,8 +14,8 @@ def csv_to_exact_yaml(csv_file, yaml_file, path_prefix=None):
if condition not in data:
data[condition] = {"long read files": [], "labels": []}

# Append the full path if path_prefix is provided
bam_file = f"{path_prefix}/{row['fastq']}.bam" if path_prefix else row['fastq']
# Append .bam to the filename
bam_file = f"{row['fastq']}.bam"
label = f"Sample{row['sample']}"

data[condition]["long read files"].append(bam_file)
Expand Down Expand Up @@ -48,14 +48,13 @@ def csv_to_exact_yaml(csv_file, yaml_file, path_prefix=None):
# Main function to handle command-line arguments
if __name__ == "__main__":
# Argument parsing
parser = argparse.ArgumentParser(description="Convert CSV to YAML and update bam file paths")
parser = argparse.ArgumentParser(description="Convert CSV to YAML and append .bam to file names")
parser.add_argument('--input', required=True, help="Input CSV file")
parser.add_argument('--output', required=True, help="Output YAML file")
parser.add_argument('--path', help="Optional path to prepend to 'bam' column values")

args = parser.parse_args()

# Convert the CSV to the YAML structure, appending the full path to 'bam' if provided
csv_to_exact_yaml(args.input, args.output, args.path)
# Convert the CSV to the YAML structure
csv_to_exact_yaml(args.input, args.output)

print(f"YAML file has been created: {args.output}")
27 changes: 8 additions & 19 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
*/

nextflow.enable.dsl=2
if ( params.help ) {
params.help = false

if ( params.help) {
help = """
Usage:
nextflow run main.nf --reads <path> --samplesheet <path> [options]
Expand Down Expand Up @@ -39,18 +41,6 @@ if ( params.help ) {
exit(0)
}

// Display pipeline details
println """\
T R A N S C R I P T - A N N O T A T I O N - N F P I P E L I N E
===================================
orientation : ${params.oriented}
fastq : ${params.reads}
sam : ${params.sam}
genome : ${params.genome}
annotation : ${params.annotation}
outdir : ${params.outdir}
"""
.stripIndent()

/*
========================================================================================
Expand All @@ -67,7 +57,6 @@ include { NONORIENTED_WORKFLOW } from './subworkflows/nonoriented_annotati
*/

workflow{
genome_ch = file( params.genome )
annot_ch = file( params.annotation )
config_ch = file( params.config, checkIfExists:true )
shortread_ch = params.optional_shortread != null ? file(params.optional_shortread, type: "file") : file("no_shortread", type: "file")
Expand All @@ -77,8 +66,7 @@ workflow{

if (params.oriented == false) {

NONORIENTED_WORKFLOW(genome_ch,
annot_ch,
NONORIENTED_WORKFLOW(annot_ch,
config_ch,
shortread_ch,
junc_bed_ch,
Expand All @@ -87,8 +75,7 @@ workflow{
} else if (params.oriented == true) {
sam_ch = Channel.fromPath( params.sam, checkIfExists:true )

ORIENTED_WORKFLOW(genome_ch,
annot_ch,
ORIENTED_WORKFLOW(annot_ch,
config_ch,
shortread_ch,
junc_bed_ch,
Expand Down Expand Up @@ -129,9 +116,11 @@ log.info """\
junction bed files minimap2 : ${params.junc_bed}
IsoQuant model strategy : ${params.model_strategy}
RNABloom short read polishing data : ${params.optional_shortread}
gffread parameters : ${params.gffread_parameters}
outdir : ${params.outdir}
"""
.stripIndent()

/*
========================================================================================
========================================================================================
*/
3 changes: 2 additions & 1 deletion modules/gffread.nf
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ process GFFREAD {
input:
path genome
tuple val(condition), path(polished_gtf)
val gffread_parameters

output:
tuple val(condition), path("${condition}.transcripts_polished_clustersMKZ.gff3"), emit: gffread_gff3
Expand All @@ -27,6 +28,6 @@ process GFFREAD {
"""
gffread -g ${genome} \
-o ${condition}.transcripts_polished_clustersMKZ.gff3 \
-M -K -Z ${polished_gtf} \
${gffread_parameters} ${polished_gtf}
"""
}
5 changes: 4 additions & 1 deletion modules/isoquant.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
process ISOQUANT {

// where to store the results and in which way
cpus 24
cpus 16
maxForks 1

publishDir( "${params.outdir}", mode: 'copy' )

// show in the log which input file is analysed
Expand All @@ -20,6 +22,7 @@ process ISOQUANT {

input:
val ready
path bams
path genome
path samplesheet
val model_strategy
Expand Down
4 changes: 4 additions & 0 deletions modules/merge_fastq.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ process MERGE_FASTQ_RESTRANDER {
// where to store the results and in which way
debug true
publishDir( "${params.outdir}/rnabloom", mode: 'copy' )

tag( "${reads}" )

input:
path samplesheet
Expand All @@ -29,6 +31,8 @@ process MERGE_FASTQ_EOULSAN {
// where to store the results and in which way
debug true
publishDir( "${params.outdir}/rnabloom", mode: 'copy' )

tag( "${reads}" )

input:
path samplesheet
Expand Down
2 changes: 1 addition & 1 deletion modules/rnabloom.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
process RNA_BLOOM {
// where to store the results and in which way
debug true
cpus 24
cpus 16
maxForks 1
maxRetries 2

Expand Down
7 changes: 5 additions & 2 deletions modules/rnabloom_minimap2.nf
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,17 @@ process RNABLOOM_MINIMAP2 {
input:
path genome
tuple val(condition), path(bloomfasta)
val intron_length
path junc_bed

output:
tuple val(condition), path( "${bloomfasta.SimpleName}.sam" ), emit: rnabloom_sam

script:
def junc_bed_arg = junc_bed.name != 'no_junc_bed' ? "--junc-bed $junc_bed" : ""
"""
minimap2 -ax splice -uf -k14 \
${genome} ${bloomfasta} > ${bloomfasta.SimpleName}.sam
minimap2 -G ${intron_length} -ax splice -uf -k14 \
${junc_bed_arg} ${genome} ${bloomfasta} > ${bloomfasta.SimpleName}.sam
"""

}
2 changes: 1 addition & 1 deletion modules/samplesheet2yaml.nf
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,6 @@ process SAMPLESHEET2YAML {

script:
"""
python3 $projectDir/bin/samplesheet2yaml.py --input ${samplesheet} --output dataset.yaml --path ${params.outdir}/bam
python3 $projectDir/bin/samplesheet2yaml.py --input ${samplesheet} --output dataset.yaml
"""
}
3 changes: 1 addition & 2 deletions modules/samtools.nf
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@ process SAMTOOLS {
path(sam)

output:
path("${sam.SimpleName}.bam"), emit: samtools_bam
path("${sam.SimpleName}.bam.bai")
tuple path("${sam.SimpleName}.bam"), path("${sam.SimpleName}.bam.bai"), emit: samtools_bam
val("process_complete"), emit: process_control

script:
Expand Down
21 changes: 3 additions & 18 deletions modules/uncompress_files.nf
Original file line number Diff line number Diff line change
Expand Up @@ -14,27 +14,12 @@ process UNCOMPRESS_GENOME {
path genome

output:
path( "*" ), emit: genome_isoquant
path( "*" ), emit: genome_gffread
path( "${genome.BaseName}" ), emit: genome_isoquant
path( "${genome.BaseName}" ), emit: genome_minimap2
path( "${genome.BaseName}" ), emit: genome_gffread

script:
"""
bzip2 -dc ${genome} > ${genome.BaseName}
"""
}

process UNCOMPRESS_ANNOTATION {
debug true
publishDir( "${params.outdir}/ressources", mode: 'copy' )

input:
path annotation

output:
path( "*" ), emit: annotation_merge

script:
"""
bzip2 -dk ${annotation}
"""
}
27 changes: 15 additions & 12 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -8,34 +8,37 @@

params {
// Input options
reads = "/import/pontos01/analyses/OUTOFTHEBLUE_C2024/achilles/*.fastq.gz"
samplesheet = "/import/pontos01/analyses/OUTOFTHEBLUE_C2024/egzotek/achilles/samplesheet.csv"
reads = "/import/pontos01/analyses/OUTOFTHEBLUE_C2024/egzotek/achilles/*.fastq"
samplesheet = "/import/pontos01/analyses/OUTOFTHEBLUE_C2024/egzotek/achilles/samplesheet.csv"

// References
//genome = "${launchDir}/data/hdujardini_HiC"
//annotation = "${launchDir}/data/hdujardini_HiC"
genome = "/import/rhodos10/ressources/sequencages/genomes/morphoach1.fa.bz2"
annotation = "/home/brunon/shares-net/sequencages/ressources/annotations/morphoach1.gff.bz2"
genome = "/import/rhodos10/ressources/sequencages/genomes/morphoach1.fa.bz2"
annotation = "/import/rhodos10/ressources/sequencages/annotations/morphoach1.gff.bz2"

// Orientation of FASTQ files
oriented = true // if reads already oriented, replace with oriented = true
sam = "/import/pontos01/analyses/OUTOFTHEBLUE_C2024/egzotek/achilles/*.sam" // if oriented = true, provide sam files from eoulsan
oriented = true // if reads already oriented, replace with oriented = true
sam = "/import/pontos01/analyses/OUTOFTHEBLUE_C2024/egzotek/achilles/*.sam" // if oriented = true, provide sam files from eoulsan

// Restrander configuration file (TSO and RTP sequences)
config = "${launchDir}/assets/PCB111.json"
config = "${launchDir}/assets/PCB111.json"

// GFFRead input parameters
gffread_parameters = "-M"

// Minimap2 intron length
intron_length = "20000" // 200k by default
junc_bed = null // if no junk bed, replace with optional_shortread = null
intron_length = "20000" // 200k by default
junc_bed = null // if no junk bed, replace with optional_shortread = null

// IsoQuant module input parameters
model_strategy = "default_ont"
model_strategy = "default_ont"

// RNABloom input options
optional_shortread = null // if no short reads, replace with optional_shortread = null
optional_shortread = null // if no short reads, replace with optional_shortread = null

// Output directory
outdir = "${launchDir}/result/achilles"
outdir = "/import/pontos01/analyses/OUTOFTHEBLUE_C2024/egzotek/achilles/result"
}

docker {
Expand Down
27 changes: 20 additions & 7 deletions subworkflows/oriented_annotation.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,10 @@ include { RNABLOOM_PAFTOOLS }
include { RNABLOOM_AGAT_BED2GFF; RNABLOOM_AGAT_GFF2GTF; AGAT_COMPLEMENT; MERGE_AGAT_GFF2GTF } from '../modules/agat.nf'
include { SAMPLESHEET2YAML } from '../modules/samplesheet2yaml.nf'
include { SAMTOOLS } from '../modules/samtools.nf'
include { UNCOMPRESS_GENOME; UNCOMPRESS_ANNOTATION } from '../modules/uncompress_files.nf'
include { UNCOMPRESS_GENOME } from '../modules/uncompress_files.nf'

workflow ORIENTED_WORKFLOW {
take:
genome
annot
config
shortread
Expand All @@ -29,23 +28,37 @@ workflow ORIENTED_WORKFLOW {
reads

main:
// Prepare genome for different steps
ch_isoquant_genome = Channel.empty()
if (params.genome.endsWith('.gz')|| params.genome.endsWith(".bz2")){
genome_ch = file( params.genome )
UNCOMPRESS_GENOME(genome_ch)
ch_isoquant_genome = UNCOMPRESS_GENOME.out.genome_isoquant
ch_minimap2_genome = UNCOMPRESS_GENOME.out.genome_minimap2
ch_gffread_genome = UNCOMPRESS_GENOME.out.genome_gffread
} else {
ch_isoquant_genome = file( params.genome )
ch_minimap2_genome = file( params.genome )
ch_gffread_genome = file( params.genome )
}

// Transcript annotation modules: IsoQuant
SAMTOOLS(sam)
SAMPLESHEET2YAML(samplesheet)
UNCOMPRESS_GENOME(genome)
ISOQUANT(SAMTOOLS.out.process_control.collect(), UNCOMPRESS_GENOME.out.genome_isoquant, SAMPLESHEET2YAML.out.dataset_yaml, params.model_strategy)
ISOQUANT(SAMTOOLS.out.process_control.collect(), SAMTOOLS.out.samtools_bam.collect(), ch_isoquant_genome, SAMPLESHEET2YAML.out.dataset_yaml, params.model_strategy)
ISOQUANT_CONDITION(ISOQUANT.out.isoquant_gtf.flatten())

// Transcript annotation modules: RNABloom
MERGE_FASTQ_EOULSAN(samplesheet, reads)
MERGE_FASTQ_EOULSAN(samplesheet, reads.collect())
RNA_BLOOM(MERGE_FASTQ_EOULSAN.out.merged_fastq.flatten(), shortread)
RNABLOOM_MINIMAP2(genome, RNA_BLOOM.out.rnabloom_fasta)
RNABLOOM_MINIMAP2(ch_minimap2_genome, RNA_BLOOM.out.rnabloom_fasta, params.intron_length, junc_bed)
RNABLOOM_PAFTOOLS(RNABLOOM_MINIMAP2.out.rnabloom_sam)
RNABLOOM_AGAT_BED2GFF(RNABLOOM_PAFTOOLS.out.rnabloom_bed)
RNABLOOM_AGAT_GFF2GTF(RNABLOOM_AGAT_BED2GFF.out.agat_gff)

// Merging of transcript annotations
AGAT_COMPLEMENT(ISOQUANT_CONDITION.out.isoquant_condition_gtf.join(RNABLOOM_AGAT_GFF2GTF.out.agat_gtf))
GFFREAD(UNCOMPRESS_GENOME.out.genome_gffread, AGAT_COMPLEMENT.out.polished_gtf)
GFFREAD(ch_gffread_genome, AGAT_COMPLEMENT.out.polished_gtf, params.gffread_parameters)
MERGE_AGAT_GFF2GTF(GFFREAD.out.gffread_gff3)
MERGE_ANNOTATION(annot, MERGE_AGAT_GFF2GTF.out.merged_agat_gtf)
}

0 comments on commit 1e12af7

Please sign in to comment.