From 1b7c279ce13ffe7e9f8fb636141edb06b1679ee6 Mon Sep 17 00:00:00 2001
From: Salome Brunon <salome.brunon@gmail.com>
Date: Wed, 30 Oct 2024 14:30:39 +0100
Subject: [PATCH 01/12] PRODUCTION: added CHANGES file for versionning

---
 CHANGES.md | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 CHANGES.md

diff --git a/CHANGES.md b/CHANGES.md
new file mode 100644
index 0000000..2b13766
--- /dev/null
+++ b/CHANGES.md
@@ -0,0 +1,4 @@
+# Changelog
+
+## 0.1 (2024-10-30)
+* Initial version
\ No newline at end of file

From fe32bc03e647879970896fafe83fb14782e5cd05 Mon Sep 17 00:00:00 2001
From: Salome Brunon <salome.brunon@gmail.com>
Date: Tue, 5 Nov 2024 13:41:03 +0100
Subject: [PATCH 02/12] FIX: fixed issues with isoquant not finding bam

---
 bin/samplesheet2yaml.py     | 13 ++++++-------
 modules/isoquant.nf         |  3 +++
 modules/samplesheet2yaml.nf |  2 +-
 nextflow.config             | 24 ++++++++++++------------
 4 files changed, 22 insertions(+), 20 deletions(-)

diff --git a/bin/samplesheet2yaml.py b/bin/samplesheet2yaml.py
index 4320463..115bccb 100644
--- a/bin/samplesheet2yaml.py
+++ b/bin/samplesheet2yaml.py
@@ -3,7 +3,7 @@
 import sys
 
 # Function to convert CSV to the exact YAML structure
-def csv_to_exact_yaml(csv_file, yaml_file, path_prefix=None):
+def csv_to_exact_yaml(csv_file, yaml_file):
     data = {}
 
     # Reading the CSV file and grouping data by 'condition'
@@ -14,8 +14,8 @@ def csv_to_exact_yaml(csv_file, yaml_file, path_prefix=None):
             if condition not in data:
                 data[condition] = {"long read files": [], "labels": []}
 
-            # Append the full path if path_prefix is provided
-            bam_file = f"{path_prefix}/{row['fastq']}.bam" if path_prefix else row['fastq']
+            # Append .bam to the filename
+            bam_file = f"{row['fastq']}.bam"
             label = f"Sample{row['sample']}"
             
             data[condition]["long read files"].append(bam_file)
@@ -48,14 +48,13 @@ def csv_to_exact_yaml(csv_file, yaml_file, path_prefix=None):
 # Main function to handle command-line arguments
 if __name__ == "__main__":
     # Argument parsing
-    parser = argparse.ArgumentParser(description="Convert CSV to YAML and update bam file paths")
+    parser = argparse.ArgumentParser(description="Convert CSV to YAML and append .bam to file names")
     parser.add_argument('--input', required=True, help="Input CSV file")
     parser.add_argument('--output', required=True, help="Output YAML file")
-    parser.add_argument('--path', help="Optional path to prepend to 'bam' column values")
 
     args = parser.parse_args()
 
-    # Convert the CSV to the YAML structure, appending the full path to 'bam' if provided
-    csv_to_exact_yaml(args.input, args.output, args.path)
+    # Convert the CSV to the YAML structure
+    csv_to_exact_yaml(args.input, args.output)
     
     print(f"YAML file has been created: {args.output}")
\ No newline at end of file
diff --git a/modules/isoquant.nf b/modules/isoquant.nf
index 87f7ee2..ef4db9c 100644
--- a/modules/isoquant.nf
+++ b/modules/isoquant.nf
@@ -12,6 +12,8 @@ process ISOQUANT {
 
    // where to store the results and in which way
    cpus 24
+   maxForks 1
+   
    publishDir( "${params.outdir}", mode: 'copy' )
 
    // show in the log which input file is analysed
@@ -20,6 +22,7 @@ process ISOQUANT {
 
    input:
    val ready
+   path bams
    path genome 
    path samplesheet
    val model_strategy
diff --git a/modules/samplesheet2yaml.nf b/modules/samplesheet2yaml.nf
index cbf9bb6..8278f6f 100644
--- a/modules/samplesheet2yaml.nf
+++ b/modules/samplesheet2yaml.nf
@@ -25,6 +25,6 @@ process SAMPLESHEET2YAML {
    
    script:
    """
-   python3 $projectDir/bin/samplesheet2yaml.py --input ${samplesheet} --output dataset.yaml --path ${params.outdir}/bam
+   python3 $projectDir/bin/samplesheet2yaml.py --input ${samplesheet} --output dataset.yaml
    """
 }  
\ No newline at end of file
diff --git a/nextflow.config b/nextflow.config
index 54b840f..8a37e49 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -8,34 +8,34 @@
 
 params {
  	// Input options
-    reads = "/import/pontos01/analyses/OUTOFTHEBLUE_C2024/achilles/*.fastq.gz"
-    samplesheet = "/import/pontos01/analyses/OUTOFTHEBLUE_C2024/egzotek/achilles/samplesheet.csv"
+    reads               = "/import/pontos01/analyses/OUTOFTHEBLUE_C2024/menelaus/*.fastq.gz"
+    samplesheet         = "/import/pontos01/analyses/OUTOFTHEBLUE_C2024/egzotek/menelaus/samplesheet.csv"
 
  	// References
     //genome = "${launchDir}/data/hdujardini_HiC"
     //annotation = "${launchDir}/data/hdujardini_HiC"
-    genome = "/import/rhodos10/ressources/sequencages/genomes/morphoach1.fa.bz2"
-    annotation = "/home/brunon/shares-net/sequencages/ressources/annotations/morphoach1.gff.bz2"
+    genome              = "/import/rhodos10/ressources/sequencages/genomes/morphomen1.fa.bz2"
+    annotation          = "/import/rhodos10/ressources/sequencages/annotations/morphomen1.gff.bz2"
     
     // Orientation of FASTQ files
-    oriented = true  // if reads already oriented, replace with oriented = true
-    sam = "/import/pontos01/analyses/OUTOFTHEBLUE_C2024/egzotek/achilles/*.sam"  // if oriented = true, provide sam files from eoulsan
+    oriented            = true  // if reads already oriented, replace with oriented = true
+    sam                 = "/import/pontos01/analyses/OUTOFTHEBLUE_C2024/egzotek/menelaus/*.sam"  // if oriented = true, provide sam files from eoulsan
     
     // Restrander configuration file (TSO and RTP sequences)
-    config = "${launchDir}/assets/PCB111.json"
+    config              = "${launchDir}/assets/PCB111.json"
 
     // Minimap2 intron length
-    intron_length = "20000" // 200k by default
-    junc_bed = null 	// if no junk bed, replace with optional_shortread = null
+    intron_length       = "20000" // 200k by default
+    junc_bed            = null 	// if no junk bed, replace with optional_shortread = null
 
     // IsoQuant module input parameters
-    model_strategy = "default_ont"
+    model_strategy      = "default_ont"
 
     // RNABloom input options
-    optional_shortread = null  	// if no short reads, replace with optional_shortread = null
+    optional_shortread  = null  	// if no short reads, replace with optional_shortread = null
 
  	// Output directory
-    outdir = "${launchDir}/result/achilles"
+    outdir              = "/import/pontos01/analyses/OUTOFTHEBLUE_C2024/egzotek/menelaus/result"
 }
 
 docker {

From e481615cbb22692062016c53f48b3bceb010ca7e Mon Sep 17 00:00:00 2001
From: Salome Brunon <salome.brunon@gmail.com>
Date: Tue, 5 Nov 2024 13:42:09 +0100
Subject: [PATCH 03/12] FIX: add fastq.collect for merge_fastq module

---
 modules/merge_fastq.nf              | 4 ++++
 subworkflows/oriented_annotation.nf | 4 ++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/modules/merge_fastq.nf b/modules/merge_fastq.nf
index 4efe9c9..77d6e7b 100644
--- a/modules/merge_fastq.nf
+++ b/modules/merge_fastq.nf
@@ -10,6 +10,8 @@ process MERGE_FASTQ_RESTRANDER {
    // where to store the results and in which way
    debug true
    publishDir( "${params.outdir}/rnabloom", mode: 'copy' )
+   
+   tag( "${reads}" )
 
    input:
    path samplesheet
@@ -29,6 +31,8 @@ process MERGE_FASTQ_EOULSAN {
    // where to store the results and in which way
    debug true
    publishDir( "${params.outdir}/rnabloom", mode: 'copy' )
+   
+   tag( "${reads}" )
 
    input:
    path samplesheet
diff --git a/subworkflows/oriented_annotation.nf b/subworkflows/oriented_annotation.nf
index cb2437e..ff84c10 100644
--- a/subworkflows/oriented_annotation.nf
+++ b/subworkflows/oriented_annotation.nf
@@ -32,11 +32,11 @@ workflow ORIENTED_WORKFLOW {
       SAMTOOLS(sam)
       SAMPLESHEET2YAML(samplesheet)
       UNCOMPRESS_GENOME(genome)
-      ISOQUANT(SAMTOOLS.out.process_control.collect(), UNCOMPRESS_GENOME.out.genome_isoquant, SAMPLESHEET2YAML.out.dataset_yaml, params.model_strategy)
+      ISOQUANT(SAMTOOLS.out.process_control.collect(), SAMTOOLS.out.samtools_bam, UNCOMPRESS_GENOME.out.genome_isoquant, SAMPLESHEET2YAML.out.dataset_yaml, params.model_strategy)
       ISOQUANT_CONDITION(ISOQUANT.out.isoquant_gtf.flatten())
 
       // Transcript annotation modules: RNABloom
-      MERGE_FASTQ_EOULSAN(samplesheet, reads)
+      MERGE_FASTQ_EOULSAN(samplesheet, reads.collect())
       RNA_BLOOM(MERGE_FASTQ_EOULSAN.out.merged_fastq.flatten(), shortread)
       RNABLOOM_MINIMAP2(genome, RNA_BLOOM.out.rnabloom_fasta)
       RNABLOOM_PAFTOOLS(RNABLOOM_MINIMAP2.out.rnabloom_sam)

From ad6a548c7ee5d083725b771a75818f6556a267a4 Mon Sep 17 00:00:00 2001
From: Salome Brunon <salome.brunon@gmail.com>
Date: Tue, 5 Nov 2024 13:45:43 +0100
Subject: [PATCH 04/12] FIX: added samtools_bam.collect to resolve missing bam

---
 subworkflows/oriented_annotation.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/subworkflows/oriented_annotation.nf b/subworkflows/oriented_annotation.nf
index ff84c10..7908bb6 100644
--- a/subworkflows/oriented_annotation.nf
+++ b/subworkflows/oriented_annotation.nf
@@ -32,7 +32,7 @@ workflow ORIENTED_WORKFLOW {
       SAMTOOLS(sam)
       SAMPLESHEET2YAML(samplesheet)
       UNCOMPRESS_GENOME(genome)
-      ISOQUANT(SAMTOOLS.out.process_control.collect(), SAMTOOLS.out.samtools_bam, UNCOMPRESS_GENOME.out.genome_isoquant, SAMPLESHEET2YAML.out.dataset_yaml, params.model_strategy)
+      ISOQUANT(SAMTOOLS.out.process_control.collect(), SAMTOOLS.out.samtools_bam.collect(), UNCOMPRESS_GENOME.out.genome_isoquant, SAMPLESHEET2YAML.out.dataset_yaml, params.model_strategy)
       ISOQUANT_CONDITION(ISOQUANT.out.isoquant_gtf.flatten())
 
       // Transcript annotation modules: RNABloom

From 4872929f47a74c1954fef6afcc763fdd0432a8f1 Mon Sep 17 00:00:00 2001
From: Salome Brunon <salome.brunon@gmail.com>
Date: Tue, 5 Nov 2024 13:46:40 +0100
Subject: [PATCH 05/12] FIX: added tuple .bam .bai for isoquant

---
 modules/samtools.nf | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/modules/samtools.nf b/modules/samtools.nf
index c1898b8..85bb337 100644
--- a/modules/samtools.nf
+++ b/modules/samtools.nf
@@ -20,8 +20,7 @@ process SAMTOOLS {
    path(sam)
 
    output:
-   path("${sam.SimpleName}.bam"), emit: samtools_bam
-   path("${sam.SimpleName}.bam.bai")
+   tuple path("${sam.SimpleName}.bam"), path("${sam.SimpleName}.bam.bai"), emit: samtools_bam
    val("process_complete"), emit: process_control
       
    script:

From e07c3b75d2795f09a777cdea6eea6509151a16ec Mon Sep 17 00:00:00 2001
From: Salome Brunon <salome.brunon@gmail.com>
Date: Tue, 5 Nov 2024 13:49:59 +0100
Subject: [PATCH 06/12] FIX: bug with help appearing at start

---
 main.nf | 19 +++++--------------
 1 file changed, 5 insertions(+), 14 deletions(-)

diff --git a/main.nf b/main.nf
index e1a48be..d56a0fb 100644
--- a/main.nf
+++ b/main.nf
@@ -8,7 +8,9 @@
 */
 
 nextflow.enable.dsl=2
-if ( params.help ) {
+params.help = false
+
+if ( params.help) {
    help = """
    Usage:
       nextflow run main.nf --reads <path> --samplesheet <path> [options]
@@ -39,18 +41,6 @@ if ( params.help ) {
    exit(0)
 }
 
-// Display pipeline details
-println """\
-      T R A N S C R I P T - A N N O T A T I O N - N F   P I P E L I N E
-      ===================================
-      orientation : ${params.oriented}
-      fastq       : ${params.reads}
-      sam         : ${params.sam}
-      genome      : ${params.genome}
-      annotation  : ${params.annotation}
-      outdir      : ${params.outdir}
-      """
-      .stripIndent()
 
 /*
 ========================================================================================
@@ -134,4 +124,5 @@ log.info """\
    .stripIndent()
 
 /*
-========================================================================================
\ No newline at end of file
+========================================================================================
+*/
\ No newline at end of file

From e8c822c8c13df0a934eb730f5ee95b7fbd5010f7 Mon Sep 17 00:00:00 2001
From: Salome Brunon <salome.brunon@gmail.com>
Date: Wed, 6 Nov 2024 14:59:46 +0100
Subject: [PATCH 07/12] ADD: handles both unzipped and zipped genomes

---
 modules/uncompress_files.nf         | 20 ++------------------
 subworkflows/oriented_annotation.nf | 20 ++++++++++++++++----
 2 files changed, 18 insertions(+), 22 deletions(-)

diff --git a/modules/uncompress_files.nf b/modules/uncompress_files.nf
index 5b8ed80..2e9ea0c 100644
--- a/modules/uncompress_files.nf
+++ b/modules/uncompress_files.nf
@@ -14,27 +14,11 @@ process UNCOMPRESS_GENOME {
    path genome
 
    output:
-   path( "*" ), emit: genome_isoquant
-   path( "*" ), emit: genome_gffread
+   path( "${genome.BaseName}" ), emit: genome_isoquant
+   path( "${genome.BaseName}" ), emit: genome_gffread
    
    script:
    """
    bzip2 -dc ${genome} > ${genome.BaseName}
    """
-}
-
-process UNCOMPRESS_ANNOTATION {
-   debug true
-   publishDir( "${params.outdir}/ressources", mode: 'copy' )
-
-   input:
-   path annotation
-
-   output:
-   path( "*" ), emit: annotation_merge
-   
-   script:
-   """
-   bzip2 -dk ${annotation}
-   """
 }
\ No newline at end of file
diff --git a/subworkflows/oriented_annotation.nf b/subworkflows/oriented_annotation.nf
index 7908bb6..cd384cb 100644
--- a/subworkflows/oriented_annotation.nf
+++ b/subworkflows/oriented_annotation.nf
@@ -15,7 +15,7 @@ include { RNABLOOM_PAFTOOLS }
 include { RNABLOOM_AGAT_BED2GFF; RNABLOOM_AGAT_GFF2GTF; AGAT_COMPLEMENT; MERGE_AGAT_GFF2GTF }   from '../modules/agat.nf'
 include { SAMPLESHEET2YAML }                                                                    from '../modules/samplesheet2yaml.nf'
 include { SAMTOOLS }                                                                            from '../modules/samtools.nf'
-include { UNCOMPRESS_GENOME; UNCOMPRESS_ANNOTATION }                                            from '../modules/uncompress_files.nf'
+include { UNCOMPRESS_GENOME }                                                                   from '../modules/uncompress_files.nf'
 
 workflow ORIENTED_WORKFLOW {
    take:
@@ -29,10 +29,22 @@ workflow ORIENTED_WORKFLOW {
       reads
       
    main:
+      // Check if genome is zipped
+      ch_isoquant_genome = Channel.empty()
+      ch_gffread_genome = Channel.empty()
+      if (genome.endsWith('.gz')|| genome.endsWith(".bz2")){
+            UNCOMPRESS_GENOME( [ [:], genome ])
+            ch_isoquant_genome = UNCOMPRESS_GENOME.out.genome_isoquant
+            ch_gffread_genome = UNCOMPRESS_GENOME.out.genome_gffread
+      } else {
+            ch_isoquant_genome = [ [:], genome ]
+            ch_gffread_genome= [ [:], genome ]
+      }
+
+      // Transcript annotation modules: IsoQuant
       SAMTOOLS(sam)
       SAMPLESHEET2YAML(samplesheet)
-      UNCOMPRESS_GENOME(genome)
-      ISOQUANT(SAMTOOLS.out.process_control.collect(), SAMTOOLS.out.samtools_bam.collect(), UNCOMPRESS_GENOME.out.genome_isoquant, SAMPLESHEET2YAML.out.dataset_yaml, params.model_strategy)
+      ISOQUANT(SAMTOOLS.out.process_control.collect(), SAMTOOLS.out.samtools_bam.collect(), ch_isoquant_genome, SAMPLESHEET2YAML.out.dataset_yaml, params.model_strategy)
       ISOQUANT_CONDITION(ISOQUANT.out.isoquant_gtf.flatten())
 
       // Transcript annotation modules: RNABloom
@@ -45,7 +57,7 @@ workflow ORIENTED_WORKFLOW {
 
       // Merging of transcript annotations
       AGAT_COMPLEMENT(ISOQUANT_CONDITION.out.isoquant_condition_gtf.join(RNABLOOM_AGAT_GFF2GTF.out.agat_gtf))
-      GFFREAD(UNCOMPRESS_GENOME.out.genome_gffread, AGAT_COMPLEMENT.out.polished_gtf)
+      GFFREAD(ch_gffread_genome, AGAT_COMPLEMENT.out.polished_gtf)
       MERGE_AGAT_GFF2GTF(GFFREAD.out.gffread_gff3)
       MERGE_ANNOTATION(annot, MERGE_AGAT_GFF2GTF.out.merged_agat_gtf)
 }

From 33eb3abc9d1cae1739efa10c988f3c95b7505ac8 Mon Sep 17 00:00:00 2001
From: Salome Brunon <salome.brunon@gmail.com>
Date: Wed, 13 Nov 2024 17:20:40 +0100
Subject: [PATCH 08/12] ADD: new parameter for gffread clusterisation

---
 modules/gffread.nf | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/modules/gffread.nf b/modules/gffread.nf
index 3fe2399..33227d5 100644
--- a/modules/gffread.nf
+++ b/modules/gffread.nf
@@ -19,6 +19,7 @@ process GFFREAD {
     input:
     path genome
     tuple val(condition), path(polished_gtf)
+    val gffread_parameters
     
     output:
     tuple val(condition), path("${condition}.transcripts_polished_clustersMKZ.gff3"), emit: gffread_gff3
@@ -27,6 +28,6 @@ process GFFREAD {
     """
     gffread  -g ${genome} \
     -o ${condition}.transcripts_polished_clustersMKZ.gff3 \
-    -M -K -Z ${polished_gtf} \
+    ${gffread_parameters} ${polished_gtf} 
     """
 }

From 7373c05fba945290294930c8b47c41f8ba8390a6 Mon Sep 17 00:00:00 2001
From: Salome Brunon <salome.brunon@gmail.com>
Date: Wed, 13 Nov 2024 17:21:25 +0100
Subject: [PATCH 09/12] FIX: fixed issues with unzipped and zipped genomes for
 isoquant

---
 main.nf                             |  8 +++-----
 modules/uncompress_files.nf         |  1 +
 subworkflows/oriented_annotation.nf | 23 ++++++++++++-----------
 3 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/main.nf b/main.nf
index d56a0fb..aa82202 100644
--- a/main.nf
+++ b/main.nf
@@ -57,7 +57,6 @@ include { NONORIENTED_WORKFLOW       } from './subworkflows/nonoriented_annotati
 */
 
 workflow{
-   genome_ch = file( params.genome )
    annot_ch = file( params.annotation )
    config_ch = file( params.config, checkIfExists:true )
    shortread_ch = params.optional_shortread != null ? file(params.optional_shortread, type: "file") : file("no_shortread", type: "file")
@@ -67,8 +66,7 @@ workflow{
 
    if (params.oriented == false) {
       
-      NONORIENTED_WORKFLOW(genome_ch,
-                        annot_ch,
+      NONORIENTED_WORKFLOW(annot_ch,
                         config_ch,
                         shortread_ch,
                         junc_bed_ch,
@@ -77,8 +75,7 @@ workflow{
    } else if (params.oriented == true) {
       sam_ch = Channel.fromPath( params.sam, checkIfExists:true )
 
-      ORIENTED_WORKFLOW(genome_ch,
-                        annot_ch,
+      ORIENTED_WORKFLOW(annot_ch,
                         config_ch,
                         shortread_ch,
                         junc_bed_ch,
@@ -119,6 +116,7 @@ log.info """\
    junction bed files minimap2           : ${params.junc_bed}
    IsoQuant model strategy               : ${params.model_strategy}
    RNABloom short read polishing data    : ${params.optional_shortread}
+   gffread parameters                    : ${params.gffread_parameters}
    outdir                                : ${params.outdir}
    """
    .stripIndent()
diff --git a/modules/uncompress_files.nf b/modules/uncompress_files.nf
index 2e9ea0c..74ea998 100644
--- a/modules/uncompress_files.nf
+++ b/modules/uncompress_files.nf
@@ -15,6 +15,7 @@ process UNCOMPRESS_GENOME {
 
    output:
    path( "${genome.BaseName}" ), emit: genome_isoquant
+   path( "${genome.BaseName}" ), emit: genome_minimap2
    path( "${genome.BaseName}" ), emit: genome_gffread
    
    script:
diff --git a/subworkflows/oriented_annotation.nf b/subworkflows/oriented_annotation.nf
index cd384cb..d045361 100644
--- a/subworkflows/oriented_annotation.nf
+++ b/subworkflows/oriented_annotation.nf
@@ -19,7 +19,6 @@ include { UNCOMPRESS_GENOME }
 
 workflow ORIENTED_WORKFLOW {
    take:
-      genome
       annot
       config
       shortread
@@ -29,16 +28,18 @@ workflow ORIENTED_WORKFLOW {
       reads
       
    main:
-      // Check if genome is zipped
-      ch_isoquant_genome = Channel.empty()
-      ch_gffread_genome = Channel.empty()
-      if (genome.endsWith('.gz')|| genome.endsWith(".bz2")){
-            UNCOMPRESS_GENOME( [ [:], genome ])
+      // Prepare genome for different steps
+      ch_isoquant_genome = Channel.empty()      
+      if (params.genome.endsWith('.gz')|| params.genome.endsWith(".bz2")){
+            genome_ch = file( params.genome )
+            UNCOMPRESS_GENOME(genome_ch)
             ch_isoquant_genome = UNCOMPRESS_GENOME.out.genome_isoquant
-            ch_gffread_genome = UNCOMPRESS_GENOME.out.genome_gffread
+            ch_minimap2_genome = UNCOMPRESS_GENOME.out.genome_minimap2
+            ch_gffread_genome  = UNCOMPRESS_GENOME.out.genome_gffread
       } else {
-            ch_isoquant_genome = [ [:], genome ]
-            ch_gffread_genome= [ [:], genome ]
+            ch_isoquant_genome = file( params.genome )
+            ch_minimap2_genome = file( params.genome )
+            ch_gffread_genome  = file( params.genome )
       }
 
       // Transcript annotation modules: IsoQuant
@@ -50,14 +51,14 @@ workflow ORIENTED_WORKFLOW {
       // Transcript annotation modules: RNABloom
       MERGE_FASTQ_EOULSAN(samplesheet, reads.collect())
       RNA_BLOOM(MERGE_FASTQ_EOULSAN.out.merged_fastq.flatten(), shortread)
-      RNABLOOM_MINIMAP2(genome, RNA_BLOOM.out.rnabloom_fasta)
+      RNABLOOM_MINIMAP2(ch_minimap2_genome, RNA_BLOOM.out.rnabloom_fasta)
       RNABLOOM_PAFTOOLS(RNABLOOM_MINIMAP2.out.rnabloom_sam)
       RNABLOOM_AGAT_BED2GFF(RNABLOOM_PAFTOOLS.out.rnabloom_bed)
       RNABLOOM_AGAT_GFF2GTF(RNABLOOM_AGAT_BED2GFF.out.agat_gff)
 
       // Merging of transcript annotations
       AGAT_COMPLEMENT(ISOQUANT_CONDITION.out.isoquant_condition_gtf.join(RNABLOOM_AGAT_GFF2GTF.out.agat_gtf))
-      GFFREAD(ch_gffread_genome, AGAT_COMPLEMENT.out.polished_gtf)
+      GFFREAD(ch_gffread_genome, AGAT_COMPLEMENT.out.polished_gtf, params.gffread_parameters)
       MERGE_AGAT_GFF2GTF(GFFREAD.out.gffread_gff3)
       MERGE_ANNOTATION(annot, MERGE_AGAT_GFF2GTF.out.merged_agat_gtf)
 }

From 626c994aec9433fa5eed3f868de5e3d9c648cdbd Mon Sep 17 00:00:00 2001
From: Salome Brunon <salome.brunon@gmail.com>
Date: Thu, 14 Nov 2024 10:46:38 +0100
Subject: [PATCH 10/12] UPDATE: changed number of max cpus to use

---
 modules/isoquant.nf | 2 +-
 modules/rnabloom.nf | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/isoquant.nf b/modules/isoquant.nf
index ef4db9c..54d1f1a 100644
--- a/modules/isoquant.nf
+++ b/modules/isoquant.nf
@@ -11,7 +11,7 @@
 process ISOQUANT {
 
    // where to store the results and in which way
-   cpus 24
+   cpus 16
    maxForks 1
    
    publishDir( "${params.outdir}", mode: 'copy' )
diff --git a/modules/rnabloom.nf b/modules/rnabloom.nf
index d7e4af7..157f138 100644
--- a/modules/rnabloom.nf
+++ b/modules/rnabloom.nf
@@ -10,7 +10,7 @@
 process RNA_BLOOM {
    // where to store the results and in which way
    debug true
-   cpus 24
+   cpus 16
    maxForks 1
    maxRetries 2
    

From 39477831eb3f2f2a2dc10aef5caf8f55e18cc03c Mon Sep 17 00:00:00 2001
From: Salome Brunon <salome.brunon@gmail.com>
Date: Thu, 14 Nov 2024 10:47:41 +0100
Subject: [PATCH 11/12] ADD: added intron_length and junction_bed parameters to
 minimap2 rnabloom

---
 modules/rnabloom_minimap2.nf        | 7 +++++--
 subworkflows/oriented_annotation.nf | 2 +-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/modules/rnabloom_minimap2.nf b/modules/rnabloom_minimap2.nf
index 58963df..ddd6d25 100644
--- a/modules/rnabloom_minimap2.nf
+++ b/modules/rnabloom_minimap2.nf
@@ -21,14 +21,17 @@ process RNABLOOM_MINIMAP2 {
    input:
    path genome
    tuple val(condition), path(bloomfasta)
+   val intron_length
+   path junc_bed
    
    output:
    tuple val(condition), path( "${bloomfasta.SimpleName}.sam" ), emit: rnabloom_sam
    
    script:
+   def junc_bed_arg = junc_bed.name != 'no_junc_bed' ? "--junc-bed $junc_bed" : ""
    """
-   minimap2 -ax splice -uf -k14 \
-   ${genome} ${bloomfasta} > ${bloomfasta.SimpleName}.sam
+   minimap2 -G ${intron_length} -ax splice -uf -k14 \
+   ${junc_bed_arg} ${genome} ${bloomfasta} > ${bloomfasta.SimpleName}.sam
    """
 
 }
diff --git a/subworkflows/oriented_annotation.nf b/subworkflows/oriented_annotation.nf
index d045361..fba9a78 100644
--- a/subworkflows/oriented_annotation.nf
+++ b/subworkflows/oriented_annotation.nf
@@ -51,7 +51,7 @@ workflow ORIENTED_WORKFLOW {
       // Transcript annotation modules: RNABloom
       MERGE_FASTQ_EOULSAN(samplesheet, reads.collect())
       RNA_BLOOM(MERGE_FASTQ_EOULSAN.out.merged_fastq.flatten(), shortread)
-      RNABLOOM_MINIMAP2(ch_minimap2_genome, RNA_BLOOM.out.rnabloom_fasta)
+      RNABLOOM_MINIMAP2(ch_minimap2_genome, RNA_BLOOM.out.rnabloom_fasta, params.intron_length, junc_bed)
       RNABLOOM_PAFTOOLS(RNABLOOM_MINIMAP2.out.rnabloom_sam)
       RNABLOOM_AGAT_BED2GFF(RNABLOOM_PAFTOOLS.out.rnabloom_bed)
       RNABLOOM_AGAT_GFF2GTF(RNABLOOM_AGAT_BED2GFF.out.agat_gff)

From 0c1d7a6ac95dfc13ede35604c9e1fdf63f9ee727 Mon Sep 17 00:00:00 2001
From: Salome Brunon <salome.brunon@gmail.com>
Date: Thu, 14 Nov 2024 16:35:24 +0100
Subject: [PATCH 12/12] FIX: add missing parameter for gffred in config

---
 nextflow.config | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/nextflow.config b/nextflow.config
index 8a37e49..b4f651f 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -8,22 +8,25 @@
 
 params {
  	// Input options
-    reads               = "/import/pontos01/analyses/OUTOFTHEBLUE_C2024/menelaus/*.fastq.gz"
-    samplesheet         = "/import/pontos01/analyses/OUTOFTHEBLUE_C2024/egzotek/menelaus/samplesheet.csv"
+    reads               = "/import/pontos01/analyses/OUTOFTHEBLUE_C2024/egzotek/achilles/*.fastq"
+    samplesheet         = "/import/pontos01/analyses/OUTOFTHEBLUE_C2024/egzotek/achilles/samplesheet.csv"
 
  	// References
     //genome = "${launchDir}/data/hdujardini_HiC"
     //annotation = "${launchDir}/data/hdujardini_HiC"
-    genome              = "/import/rhodos10/ressources/sequencages/genomes/morphomen1.fa.bz2"
-    annotation          = "/import/rhodos10/ressources/sequencages/annotations/morphomen1.gff.bz2"
+    genome              = "/import/rhodos10/ressources/sequencages/genomes/morphoach1.fa.bz2"
+    annotation          = "/import/rhodos10/ressources/sequencages/annotations/morphoach1.gff.bz2"
     
     // Orientation of FASTQ files
     oriented            = true  // if reads already oriented, replace with oriented = true
-    sam                 = "/import/pontos01/analyses/OUTOFTHEBLUE_C2024/egzotek/menelaus/*.sam"  // if oriented = true, provide sam files from eoulsan
+    sam                 = "/import/pontos01/analyses/OUTOFTHEBLUE_C2024/egzotek/achilles/*.sam"  // if oriented = true, provide sam files from eoulsan
     
     // Restrander configuration file (TSO and RTP sequences)
     config              = "${launchDir}/assets/PCB111.json"
 
+    // GFFRead input parameters
+    gffread_parameters = "-M"
+
     // Minimap2 intron length
     intron_length       = "20000" // 200k by default
     junc_bed            = null 	// if no junk bed, replace with optional_shortread = null
@@ -35,7 +38,7 @@ params {
     optional_shortread  = null  	// if no short reads, replace with optional_shortread = null
 
  	// Output directory
-    outdir              = "/import/pontos01/analyses/OUTOFTHEBLUE_C2024/egzotek/menelaus/result"
+    outdir              = "/import/pontos01/analyses/OUTOFTHEBLUE_C2024/egzotek/achilles/result"
 }
 
 docker {