From 1b7c279ce13ffe7e9f8fb636141edb06b1679ee6 Mon Sep 17 00:00:00 2001 From: Salome Brunon Date: Wed, 30 Oct 2024 14:30:39 +0100 Subject: [PATCH 01/12] PRODUCTION: added CHANGES file for versionning --- CHANGES.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 CHANGES.md diff --git a/CHANGES.md b/CHANGES.md new file mode 100644 index 0000000..2b13766 --- /dev/null +++ b/CHANGES.md @@ -0,0 +1,4 @@ +# Changelog + +## 0.1 (2024-10-30) +* Initial version \ No newline at end of file From fe32bc03e647879970896fafe83fb14782e5cd05 Mon Sep 17 00:00:00 2001 From: Salome Brunon Date: Tue, 5 Nov 2024 13:41:03 +0100 Subject: [PATCH 02/12] FIX: fixed issues with isoquant not finding bam --- bin/samplesheet2yaml.py | 13 ++++++------- modules/isoquant.nf | 3 +++ modules/samplesheet2yaml.nf | 2 +- nextflow.config | 24 ++++++++++++------------ 4 files changed, 22 insertions(+), 20 deletions(-) diff --git a/bin/samplesheet2yaml.py b/bin/samplesheet2yaml.py index 4320463..115bccb 100644 --- a/bin/samplesheet2yaml.py +++ b/bin/samplesheet2yaml.py @@ -3,7 +3,7 @@ import sys # Function to convert CSV to the exact YAML structure -def csv_to_exact_yaml(csv_file, yaml_file, path_prefix=None): +def csv_to_exact_yaml(csv_file, yaml_file): data = {} # Reading the CSV file and grouping data by 'condition' @@ -14,8 +14,8 @@ def csv_to_exact_yaml(csv_file, yaml_file, path_prefix=None): if condition not in data: data[condition] = {"long read files": [], "labels": []} - # Append the full path if path_prefix is provided - bam_file = f"{path_prefix}/{row['fastq']}.bam" if path_prefix else row['fastq'] + # Append .bam to the filename + bam_file = f"{row['fastq']}.bam" label = f"Sample{row['sample']}" data[condition]["long read files"].append(bam_file) @@ -48,14 +48,13 @@ def csv_to_exact_yaml(csv_file, yaml_file, path_prefix=None): # Main function to handle command-line arguments if __name__ == "__main__": # Argument parsing - parser = argparse.ArgumentParser(description="Convert CSV to YAML and update bam file paths") + parser = argparse.ArgumentParser(description="Convert CSV to YAML and append .bam to file names") parser.add_argument('--input', required=True, help="Input CSV file") parser.add_argument('--output', required=True, help="Output YAML file") - parser.add_argument('--path', help="Optional path to prepend to 'bam' column values") args = parser.parse_args() - # Convert the CSV to the YAML structure, appending the full path to 'bam' if provided - csv_to_exact_yaml(args.input, args.output, args.path) + # Convert the CSV to the YAML structure + csv_to_exact_yaml(args.input, args.output) print(f"YAML file has been created: {args.output}") \ No newline at end of file diff --git a/modules/isoquant.nf b/modules/isoquant.nf index 87f7ee2..ef4db9c 100644 --- a/modules/isoquant.nf +++ b/modules/isoquant.nf @@ -12,6 +12,8 @@ process ISOQUANT { // where to store the results and in which way cpus 24 + maxForks 1 + publishDir( "${params.outdir}", mode: 'copy' ) // show in the log which input file is analysed @@ -20,6 +22,7 @@ process ISOQUANT { input: val ready + path bams path genome path samplesheet val model_strategy diff --git a/modules/samplesheet2yaml.nf b/modules/samplesheet2yaml.nf index cbf9bb6..8278f6f 100644 --- a/modules/samplesheet2yaml.nf +++ b/modules/samplesheet2yaml.nf @@ -25,6 +25,6 @@ process SAMPLESHEET2YAML { script: """ - python3 $projectDir/bin/samplesheet2yaml.py --input ${samplesheet} --output dataset.yaml --path ${params.outdir}/bam + python3 $projectDir/bin/samplesheet2yaml.py --input ${samplesheet} --output dataset.yaml """ } \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index 54b840f..8a37e49 100644 --- a/nextflow.config +++ b/nextflow.config @@ -8,34 +8,34 @@ params { // Input options - reads = "/import/pontos01/analyses/OUTOFTHEBLUE_C2024/achilles/*.fastq.gz" - samplesheet = "/import/pontos01/analyses/OUTOFTHEBLUE_C2024/egzotek/achilles/samplesheet.csv" + reads = "/import/pontos01/analyses/OUTOFTHEBLUE_C2024/menelaus/*.fastq.gz" + samplesheet = "/import/pontos01/analyses/OUTOFTHEBLUE_C2024/egzotek/menelaus/samplesheet.csv" // References //genome = "${launchDir}/data/hdujardini_HiC" //annotation = "${launchDir}/data/hdujardini_HiC" - genome = "/import/rhodos10/ressources/sequencages/genomes/morphoach1.fa.bz2" - annotation = "/home/brunon/shares-net/sequencages/ressources/annotations/morphoach1.gff.bz2" + genome = "/import/rhodos10/ressources/sequencages/genomes/morphomen1.fa.bz2" + annotation = "/import/rhodos10/ressources/sequencages/annotations/morphomen1.gff.bz2" // Orientation of FASTQ files - oriented = true // if reads already oriented, replace with oriented = true - sam = "/import/pontos01/analyses/OUTOFTHEBLUE_C2024/egzotek/achilles/*.sam" // if oriented = true, provide sam files from eoulsan + oriented = true // if reads already oriented, replace with oriented = true + sam = "/import/pontos01/analyses/OUTOFTHEBLUE_C2024/egzotek/menelaus/*.sam" // if oriented = true, provide sam files from eoulsan // Restrander configuration file (TSO and RTP sequences) - config = "${launchDir}/assets/PCB111.json" + config = "${launchDir}/assets/PCB111.json" // Minimap2 intron length - intron_length = "20000" // 200k by default - junc_bed = null // if no junk bed, replace with optional_shortread = null + intron_length = "20000" // 200k by default + junc_bed = null // if no junk bed, replace with optional_shortread = null // IsoQuant module input parameters - model_strategy = "default_ont" + model_strategy = "default_ont" // RNABloom input options - optional_shortread = null // if no short reads, replace with optional_shortread = null + optional_shortread = null // if no short reads, replace with optional_shortread = null // Output directory - outdir = "${launchDir}/result/achilles" + outdir = "/import/pontos01/analyses/OUTOFTHEBLUE_C2024/egzotek/menelaus/result" } docker { From e481615cbb22692062016c53f48b3bceb010ca7e Mon Sep 17 00:00:00 2001 From: Salome Brunon Date: Tue, 5 Nov 2024 13:42:09 +0100 Subject: [PATCH 03/12] FIX: add fastq.collect for merge_fastq module --- modules/merge_fastq.nf | 4 ++++ subworkflows/oriented_annotation.nf | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/modules/merge_fastq.nf b/modules/merge_fastq.nf index 4efe9c9..77d6e7b 100644 --- a/modules/merge_fastq.nf +++ b/modules/merge_fastq.nf @@ -10,6 +10,8 @@ process MERGE_FASTQ_RESTRANDER { // where to store the results and in which way debug true publishDir( "${params.outdir}/rnabloom", mode: 'copy' ) + + tag( "${reads}" ) input: path samplesheet @@ -29,6 +31,8 @@ process MERGE_FASTQ_EOULSAN { // where to store the results and in which way debug true publishDir( "${params.outdir}/rnabloom", mode: 'copy' ) + + tag( "${reads}" ) input: path samplesheet diff --git a/subworkflows/oriented_annotation.nf b/subworkflows/oriented_annotation.nf index cb2437e..ff84c10 100644 --- a/subworkflows/oriented_annotation.nf +++ b/subworkflows/oriented_annotation.nf @@ -32,11 +32,11 @@ workflow ORIENTED_WORKFLOW { SAMTOOLS(sam) SAMPLESHEET2YAML(samplesheet) UNCOMPRESS_GENOME(genome) - ISOQUANT(SAMTOOLS.out.process_control.collect(), UNCOMPRESS_GENOME.out.genome_isoquant, SAMPLESHEET2YAML.out.dataset_yaml, params.model_strategy) + ISOQUANT(SAMTOOLS.out.process_control.collect(), SAMTOOLS.out.samtools_bam, UNCOMPRESS_GENOME.out.genome_isoquant, SAMPLESHEET2YAML.out.dataset_yaml, params.model_strategy) ISOQUANT_CONDITION(ISOQUANT.out.isoquant_gtf.flatten()) // Transcript annotation modules: RNABloom - MERGE_FASTQ_EOULSAN(samplesheet, reads) + MERGE_FASTQ_EOULSAN(samplesheet, reads.collect()) RNA_BLOOM(MERGE_FASTQ_EOULSAN.out.merged_fastq.flatten(), shortread) RNABLOOM_MINIMAP2(genome, RNA_BLOOM.out.rnabloom_fasta) RNABLOOM_PAFTOOLS(RNABLOOM_MINIMAP2.out.rnabloom_sam) From ad6a548c7ee5d083725b771a75818f6556a267a4 Mon Sep 17 00:00:00 2001 From: Salome Brunon Date: Tue, 5 Nov 2024 13:45:43 +0100 Subject: [PATCH 04/12] FIX: added samtools_bam.collect to resolve missing bam --- subworkflows/oriented_annotation.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/oriented_annotation.nf b/subworkflows/oriented_annotation.nf index ff84c10..7908bb6 100644 --- a/subworkflows/oriented_annotation.nf +++ b/subworkflows/oriented_annotation.nf @@ -32,7 +32,7 @@ workflow ORIENTED_WORKFLOW { SAMTOOLS(sam) SAMPLESHEET2YAML(samplesheet) UNCOMPRESS_GENOME(genome) - ISOQUANT(SAMTOOLS.out.process_control.collect(), SAMTOOLS.out.samtools_bam, UNCOMPRESS_GENOME.out.genome_isoquant, SAMPLESHEET2YAML.out.dataset_yaml, params.model_strategy) + ISOQUANT(SAMTOOLS.out.process_control.collect(), SAMTOOLS.out.samtools_bam.collect(), UNCOMPRESS_GENOME.out.genome_isoquant, SAMPLESHEET2YAML.out.dataset_yaml, params.model_strategy) ISOQUANT_CONDITION(ISOQUANT.out.isoquant_gtf.flatten()) // Transcript annotation modules: RNABloom From 4872929f47a74c1954fef6afcc763fdd0432a8f1 Mon Sep 17 00:00:00 2001 From: Salome Brunon Date: Tue, 5 Nov 2024 13:46:40 +0100 Subject: [PATCH 05/12] FIX: added tuple .bam .bai for isoquant --- modules/samtools.nf | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/modules/samtools.nf b/modules/samtools.nf index c1898b8..85bb337 100644 --- a/modules/samtools.nf +++ b/modules/samtools.nf @@ -20,8 +20,7 @@ process SAMTOOLS { path(sam) output: - path("${sam.SimpleName}.bam"), emit: samtools_bam - path("${sam.SimpleName}.bam.bai") + tuple path("${sam.SimpleName}.bam"), path("${sam.SimpleName}.bam.bai"), emit: samtools_bam val("process_complete"), emit: process_control script: From e07c3b75d2795f09a777cdea6eea6509151a16ec Mon Sep 17 00:00:00 2001 From: Salome Brunon Date: Tue, 5 Nov 2024 13:49:59 +0100 Subject: [PATCH 06/12] FIX: bug with help appearing at start --- main.nf | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/main.nf b/main.nf index e1a48be..d56a0fb 100644 --- a/main.nf +++ b/main.nf @@ -8,7 +8,9 @@ */ nextflow.enable.dsl=2 -if ( params.help ) { +params.help = false + +if ( params.help) { help = """ Usage: nextflow run main.nf --reads --samplesheet [options] @@ -39,18 +41,6 @@ if ( params.help ) { exit(0) } -// Display pipeline details -println """\ - T R A N S C R I P T - A N N O T A T I O N - N F P I P E L I N E - =================================== - orientation : ${params.oriented} - fastq : ${params.reads} - sam : ${params.sam} - genome : ${params.genome} - annotation : ${params.annotation} - outdir : ${params.outdir} - """ - .stripIndent() /* ======================================================================================== @@ -134,4 +124,5 @@ log.info """\ .stripIndent() /* -======================================================================================== \ No newline at end of file +======================================================================================== +*/ \ No newline at end of file From e8c822c8c13df0a934eb730f5ee95b7fbd5010f7 Mon Sep 17 00:00:00 2001 From: Salome Brunon Date: Wed, 6 Nov 2024 14:59:46 +0100 Subject: [PATCH 07/12] ADD: handles both unzipped and zipped genomes --- modules/uncompress_files.nf | 20 ++------------------ subworkflows/oriented_annotation.nf | 20 ++++++++++++++++---- 2 files changed, 18 insertions(+), 22 deletions(-) diff --git a/modules/uncompress_files.nf b/modules/uncompress_files.nf index 5b8ed80..2e9ea0c 100644 --- a/modules/uncompress_files.nf +++ b/modules/uncompress_files.nf @@ -14,27 +14,11 @@ process UNCOMPRESS_GENOME { path genome output: - path( "*" ), emit: genome_isoquant - path( "*" ), emit: genome_gffread + path( "${genome.BaseName}" ), emit: genome_isoquant + path( "${genome.BaseName}" ), emit: genome_gffread script: """ bzip2 -dc ${genome} > ${genome.BaseName} """ -} - -process UNCOMPRESS_ANNOTATION { - debug true - publishDir( "${params.outdir}/ressources", mode: 'copy' ) - - input: - path annotation - - output: - path( "*" ), emit: annotation_merge - - script: - """ - bzip2 -dk ${annotation} - """ } \ No newline at end of file diff --git a/subworkflows/oriented_annotation.nf b/subworkflows/oriented_annotation.nf index 7908bb6..cd384cb 100644 --- a/subworkflows/oriented_annotation.nf +++ b/subworkflows/oriented_annotation.nf @@ -15,7 +15,7 @@ include { RNABLOOM_PAFTOOLS } include { RNABLOOM_AGAT_BED2GFF; RNABLOOM_AGAT_GFF2GTF; AGAT_COMPLEMENT; MERGE_AGAT_GFF2GTF } from '../modules/agat.nf' include { SAMPLESHEET2YAML } from '../modules/samplesheet2yaml.nf' include { SAMTOOLS } from '../modules/samtools.nf' -include { UNCOMPRESS_GENOME; UNCOMPRESS_ANNOTATION } from '../modules/uncompress_files.nf' +include { UNCOMPRESS_GENOME } from '../modules/uncompress_files.nf' workflow ORIENTED_WORKFLOW { take: @@ -29,10 +29,22 @@ workflow ORIENTED_WORKFLOW { reads main: + // Check if genome is zipped + ch_isoquant_genome = Channel.empty() + ch_gffread_genome = Channel.empty() + if (genome.endsWith('.gz')|| genome.endsWith(".bz2")){ + UNCOMPRESS_GENOME( [ [:], genome ]) + ch_isoquant_genome = UNCOMPRESS_GENOME.out.genome_isoquant + ch_gffread_genome = UNCOMPRESS_GENOME.out.genome_gffread + } else { + ch_isoquant_genome = [ [:], genome ] + ch_gffread_genome= [ [:], genome ] + } + + // Transcript annotation modules: IsoQuant SAMTOOLS(sam) SAMPLESHEET2YAML(samplesheet) - UNCOMPRESS_GENOME(genome) - ISOQUANT(SAMTOOLS.out.process_control.collect(), SAMTOOLS.out.samtools_bam.collect(), UNCOMPRESS_GENOME.out.genome_isoquant, SAMPLESHEET2YAML.out.dataset_yaml, params.model_strategy) + ISOQUANT(SAMTOOLS.out.process_control.collect(), SAMTOOLS.out.samtools_bam.collect(), ch_isoquant_genome, SAMPLESHEET2YAML.out.dataset_yaml, params.model_strategy) ISOQUANT_CONDITION(ISOQUANT.out.isoquant_gtf.flatten()) // Transcript annotation modules: RNABloom @@ -45,7 +57,7 @@ workflow ORIENTED_WORKFLOW { // Merging of transcript annotations AGAT_COMPLEMENT(ISOQUANT_CONDITION.out.isoquant_condition_gtf.join(RNABLOOM_AGAT_GFF2GTF.out.agat_gtf)) - GFFREAD(UNCOMPRESS_GENOME.out.genome_gffread, AGAT_COMPLEMENT.out.polished_gtf) + GFFREAD(ch_gffread_genome, AGAT_COMPLEMENT.out.polished_gtf) MERGE_AGAT_GFF2GTF(GFFREAD.out.gffread_gff3) MERGE_ANNOTATION(annot, MERGE_AGAT_GFF2GTF.out.merged_agat_gtf) } From 33eb3abc9d1cae1739efa10c988f3c95b7505ac8 Mon Sep 17 00:00:00 2001 From: Salome Brunon Date: Wed, 13 Nov 2024 17:20:40 +0100 Subject: [PATCH 08/12] ADD: new parameter for gffread clusterisation --- modules/gffread.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/gffread.nf b/modules/gffread.nf index 3fe2399..33227d5 100644 --- a/modules/gffread.nf +++ b/modules/gffread.nf @@ -19,6 +19,7 @@ process GFFREAD { input: path genome tuple val(condition), path(polished_gtf) + val gffread_parameters output: tuple val(condition), path("${condition}.transcripts_polished_clustersMKZ.gff3"), emit: gffread_gff3 @@ -27,6 +28,6 @@ process GFFREAD { """ gffread -g ${genome} \ -o ${condition}.transcripts_polished_clustersMKZ.gff3 \ - -M -K -Z ${polished_gtf} \ + ${gffread_parameters} ${polished_gtf} """ } From 7373c05fba945290294930c8b47c41f8ba8390a6 Mon Sep 17 00:00:00 2001 From: Salome Brunon Date: Wed, 13 Nov 2024 17:21:25 +0100 Subject: [PATCH 09/12] FIX: fixed issues with unzipped and zipped genomes for isoquant --- main.nf | 8 +++----- modules/uncompress_files.nf | 1 + subworkflows/oriented_annotation.nf | 23 ++++++++++++----------- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/main.nf b/main.nf index d56a0fb..aa82202 100644 --- a/main.nf +++ b/main.nf @@ -57,7 +57,6 @@ include { NONORIENTED_WORKFLOW } from './subworkflows/nonoriented_annotati */ workflow{ - genome_ch = file( params.genome ) annot_ch = file( params.annotation ) config_ch = file( params.config, checkIfExists:true ) shortread_ch = params.optional_shortread != null ? file(params.optional_shortread, type: "file") : file("no_shortread", type: "file") @@ -67,8 +66,7 @@ workflow{ if (params.oriented == false) { - NONORIENTED_WORKFLOW(genome_ch, - annot_ch, + NONORIENTED_WORKFLOW(annot_ch, config_ch, shortread_ch, junc_bed_ch, @@ -77,8 +75,7 @@ workflow{ } else if (params.oriented == true) { sam_ch = Channel.fromPath( params.sam, checkIfExists:true ) - ORIENTED_WORKFLOW(genome_ch, - annot_ch, + ORIENTED_WORKFLOW(annot_ch, config_ch, shortread_ch, junc_bed_ch, @@ -119,6 +116,7 @@ log.info """\ junction bed files minimap2 : ${params.junc_bed} IsoQuant model strategy : ${params.model_strategy} RNABloom short read polishing data : ${params.optional_shortread} + gffread parameters : ${params.gffread_parameters} outdir : ${params.outdir} """ .stripIndent() diff --git a/modules/uncompress_files.nf b/modules/uncompress_files.nf index 2e9ea0c..74ea998 100644 --- a/modules/uncompress_files.nf +++ b/modules/uncompress_files.nf @@ -15,6 +15,7 @@ process UNCOMPRESS_GENOME { output: path( "${genome.BaseName}" ), emit: genome_isoquant + path( "${genome.BaseName}" ), emit: genome_minimap2 path( "${genome.BaseName}" ), emit: genome_gffread script: diff --git a/subworkflows/oriented_annotation.nf b/subworkflows/oriented_annotation.nf index cd384cb..d045361 100644 --- a/subworkflows/oriented_annotation.nf +++ b/subworkflows/oriented_annotation.nf @@ -19,7 +19,6 @@ include { UNCOMPRESS_GENOME } workflow ORIENTED_WORKFLOW { take: - genome annot config shortread @@ -29,16 +28,18 @@ workflow ORIENTED_WORKFLOW { reads main: - // Check if genome is zipped - ch_isoquant_genome = Channel.empty() - ch_gffread_genome = Channel.empty() - if (genome.endsWith('.gz')|| genome.endsWith(".bz2")){ - UNCOMPRESS_GENOME( [ [:], genome ]) + // Prepare genome for different steps + ch_isoquant_genome = Channel.empty() + if (params.genome.endsWith('.gz')|| params.genome.endsWith(".bz2")){ + genome_ch = file( params.genome ) + UNCOMPRESS_GENOME(genome_ch) ch_isoquant_genome = UNCOMPRESS_GENOME.out.genome_isoquant - ch_gffread_genome = UNCOMPRESS_GENOME.out.genome_gffread + ch_minimap2_genome = UNCOMPRESS_GENOME.out.genome_minimap2 + ch_gffread_genome = UNCOMPRESS_GENOME.out.genome_gffread } else { - ch_isoquant_genome = [ [:], genome ] - ch_gffread_genome= [ [:], genome ] + ch_isoquant_genome = file( params.genome ) + ch_minimap2_genome = file( params.genome ) + ch_gffread_genome = file( params.genome ) } // Transcript annotation modules: IsoQuant @@ -50,14 +51,14 @@ workflow ORIENTED_WORKFLOW { // Transcript annotation modules: RNABloom MERGE_FASTQ_EOULSAN(samplesheet, reads.collect()) RNA_BLOOM(MERGE_FASTQ_EOULSAN.out.merged_fastq.flatten(), shortread) - RNABLOOM_MINIMAP2(genome, RNA_BLOOM.out.rnabloom_fasta) + RNABLOOM_MINIMAP2(ch_minimap2_genome, RNA_BLOOM.out.rnabloom_fasta) RNABLOOM_PAFTOOLS(RNABLOOM_MINIMAP2.out.rnabloom_sam) RNABLOOM_AGAT_BED2GFF(RNABLOOM_PAFTOOLS.out.rnabloom_bed) RNABLOOM_AGAT_GFF2GTF(RNABLOOM_AGAT_BED2GFF.out.agat_gff) // Merging of transcript annotations AGAT_COMPLEMENT(ISOQUANT_CONDITION.out.isoquant_condition_gtf.join(RNABLOOM_AGAT_GFF2GTF.out.agat_gtf)) - GFFREAD(ch_gffread_genome, AGAT_COMPLEMENT.out.polished_gtf) + GFFREAD(ch_gffread_genome, AGAT_COMPLEMENT.out.polished_gtf, params.gffread_parameters) MERGE_AGAT_GFF2GTF(GFFREAD.out.gffread_gff3) MERGE_ANNOTATION(annot, MERGE_AGAT_GFF2GTF.out.merged_agat_gtf) } From 626c994aec9433fa5eed3f868de5e3d9c648cdbd Mon Sep 17 00:00:00 2001 From: Salome Brunon Date: Thu, 14 Nov 2024 10:46:38 +0100 Subject: [PATCH 10/12] UPDATE: changed number of max cpus to use --- modules/isoquant.nf | 2 +- modules/rnabloom.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/isoquant.nf b/modules/isoquant.nf index ef4db9c..54d1f1a 100644 --- a/modules/isoquant.nf +++ b/modules/isoquant.nf @@ -11,7 +11,7 @@ process ISOQUANT { // where to store the results and in which way - cpus 24 + cpus 16 maxForks 1 publishDir( "${params.outdir}", mode: 'copy' ) diff --git a/modules/rnabloom.nf b/modules/rnabloom.nf index d7e4af7..157f138 100644 --- a/modules/rnabloom.nf +++ b/modules/rnabloom.nf @@ -10,7 +10,7 @@ process RNA_BLOOM { // where to store the results and in which way debug true - cpus 24 + cpus 16 maxForks 1 maxRetries 2 From 39477831eb3f2f2a2dc10aef5caf8f55e18cc03c Mon Sep 17 00:00:00 2001 From: Salome Brunon Date: Thu, 14 Nov 2024 10:47:41 +0100 Subject: [PATCH 11/12] ADD: added intron_length and junction_bed parameters to minimap2 rnabloom --- modules/rnabloom_minimap2.nf | 7 +++++-- subworkflows/oriented_annotation.nf | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/modules/rnabloom_minimap2.nf b/modules/rnabloom_minimap2.nf index 58963df..ddd6d25 100644 --- a/modules/rnabloom_minimap2.nf +++ b/modules/rnabloom_minimap2.nf @@ -21,14 +21,17 @@ process RNABLOOM_MINIMAP2 { input: path genome tuple val(condition), path(bloomfasta) + val intron_length + path junc_bed output: tuple val(condition), path( "${bloomfasta.SimpleName}.sam" ), emit: rnabloom_sam script: + def junc_bed_arg = junc_bed.name != 'no_junc_bed' ? "--junc-bed $junc_bed" : "" """ - minimap2 -ax splice -uf -k14 \ - ${genome} ${bloomfasta} > ${bloomfasta.SimpleName}.sam + minimap2 -G ${intron_length} -ax splice -uf -k14 \ + ${junc_bed_arg} ${genome} ${bloomfasta} > ${bloomfasta.SimpleName}.sam """ } diff --git a/subworkflows/oriented_annotation.nf b/subworkflows/oriented_annotation.nf index d045361..fba9a78 100644 --- a/subworkflows/oriented_annotation.nf +++ b/subworkflows/oriented_annotation.nf @@ -51,7 +51,7 @@ workflow ORIENTED_WORKFLOW { // Transcript annotation modules: RNABloom MERGE_FASTQ_EOULSAN(samplesheet, reads.collect()) RNA_BLOOM(MERGE_FASTQ_EOULSAN.out.merged_fastq.flatten(), shortread) - RNABLOOM_MINIMAP2(ch_minimap2_genome, RNA_BLOOM.out.rnabloom_fasta) + RNABLOOM_MINIMAP2(ch_minimap2_genome, RNA_BLOOM.out.rnabloom_fasta, params.intron_length, junc_bed) RNABLOOM_PAFTOOLS(RNABLOOM_MINIMAP2.out.rnabloom_sam) RNABLOOM_AGAT_BED2GFF(RNABLOOM_PAFTOOLS.out.rnabloom_bed) RNABLOOM_AGAT_GFF2GTF(RNABLOOM_AGAT_BED2GFF.out.agat_gff) From 0c1d7a6ac95dfc13ede35604c9e1fdf63f9ee727 Mon Sep 17 00:00:00 2001 From: Salome Brunon Date: Thu, 14 Nov 2024 16:35:24 +0100 Subject: [PATCH 12/12] FIX: add missing parameter for gffred in config --- nextflow.config | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/nextflow.config b/nextflow.config index 8a37e49..b4f651f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -8,22 +8,25 @@ params { // Input options - reads = "/import/pontos01/analyses/OUTOFTHEBLUE_C2024/menelaus/*.fastq.gz" - samplesheet = "/import/pontos01/analyses/OUTOFTHEBLUE_C2024/egzotek/menelaus/samplesheet.csv" + reads = "/import/pontos01/analyses/OUTOFTHEBLUE_C2024/egzotek/achilles/*.fastq" + samplesheet = "/import/pontos01/analyses/OUTOFTHEBLUE_C2024/egzotek/achilles/samplesheet.csv" // References //genome = "${launchDir}/data/hdujardini_HiC" //annotation = "${launchDir}/data/hdujardini_HiC" - genome = "/import/rhodos10/ressources/sequencages/genomes/morphomen1.fa.bz2" - annotation = "/import/rhodos10/ressources/sequencages/annotations/morphomen1.gff.bz2" + genome = "/import/rhodos10/ressources/sequencages/genomes/morphoach1.fa.bz2" + annotation = "/import/rhodos10/ressources/sequencages/annotations/morphoach1.gff.bz2" // Orientation of FASTQ files oriented = true // if reads already oriented, replace with oriented = true - sam = "/import/pontos01/analyses/OUTOFTHEBLUE_C2024/egzotek/menelaus/*.sam" // if oriented = true, provide sam files from eoulsan + sam = "/import/pontos01/analyses/OUTOFTHEBLUE_C2024/egzotek/achilles/*.sam" // if oriented = true, provide sam files from eoulsan // Restrander configuration file (TSO and RTP sequences) config = "${launchDir}/assets/PCB111.json" + // GFFRead input parameters + gffread_parameters = "-M" + // Minimap2 intron length intron_length = "20000" // 200k by default junc_bed = null // if no junk bed, replace with optional_shortread = null @@ -35,7 +38,7 @@ params { optional_shortread = null // if no short reads, replace with optional_shortread = null // Output directory - outdir = "/import/pontos01/analyses/OUTOFTHEBLUE_C2024/egzotek/menelaus/result" + outdir = "/import/pontos01/analyses/OUTOFTHEBLUE_C2024/egzotek/achilles/result" } docker {