From 320cb64f1e824d5a7672fc513e13935af3ec9075 Mon Sep 17 00:00:00 2001
From: slsevilla <slsevilla@gmail.com>
Date: Thu, 28 Mar 2024 04:12:28 +0000
Subject: [PATCH 01/40] feat: flag for busco

---
 nextflow.config      | 4 ++++
 workflows/phoenix.nf | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/nextflow.config b/nextflow.config
index 20e2a82f..687f8749 100755
--- a/nextflow.config
+++ b/nextflow.config
@@ -16,6 +16,10 @@ params {
     busco_db_path               = null
     coverage                    = 30 // can only increase above 30
 
+    // Run flags
+    run_busco                   = false
+
+
     // Additional input parameters for -entry SCAFFOLDS and CDC_SCAFFOLDS
     indir                       = null
     scaffolds_ext               = '.scaffolds.fa.gz'
diff --git a/workflows/phoenix.nf b/workflows/phoenix.nf
index 42fee2c8..0815080d 100644
--- a/workflows/phoenix.nf
+++ b/workflows/phoenix.nf
@@ -121,7 +121,7 @@ workflow PHOENIX_EXTERNAL {
 
         //fairy compressed file corruption check & generate read stats
         CORRUPTION_CHECK (
-            INPUT_CHECK.out.reads, false // true says busco is being run in this workflow
+            INPUT_CHECK.out.reads, params.run_busco
         )
         ch_versions = ch_versions.mix(CORRUPTION_CHECK.out.versions)
 

From f0876a8857efee7c05eecefad91cc3ea43ef6bf2 Mon Sep 17 00:00:00 2001
From: slsevilla <slsevilla@gmail.com>
Date: Thu, 28 Mar 2024 21:05:15 +0000
Subject: [PATCH 02/40] refactor: change filtering for failed samples #147

---
 conf/modules.config                     | 2 +-
 modules/local/fairy_corruption_check.nf | 5 ++---
 workflows/phoenix.nf                    | 5 +++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 1e91a084..42764e8d 100755
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -43,7 +43,7 @@ process {
             [
                 path: { "${params.outdir}/${meta.id}/file_integrity" },
                 mode: 'copy',
-                pattern: "*{_summary.txt}"
+                pattern: "*{_summary_fairy.txt}"
             ]
         ]
     }
diff --git a/modules/local/fairy_corruption_check.nf b/modules/local/fairy_corruption_check.nf
index b09c675b..8e0ebb6b 100644
--- a/modules/local/fairy_corruption_check.nf
+++ b/modules/local/fairy_corruption_check.nf
@@ -9,8 +9,7 @@ process CORRUPTION_CHECK {
     val(busco_val)
 
     output:
-    tuple val(meta), path('*_summary.txt'),                    emit: outcome
-    tuple val(meta), path('*_summary_old.txt'),                emit: outcome_to_edit
+    tuple val(meta), path('*_summary_fairy.txt'),                    emit: outcome
     path('*_summaryline.tsv'),                  optional:true, emit: summary_line
     tuple val(meta), path('*.synopsis'),        optional:true, emit: synopsis
     path("versions.yml"),                                      emit: versions
@@ -37,7 +36,7 @@ process CORRUPTION_CHECK {
     script_version=\$(${ica}fairy_proc.sh -V)
 
     #making a copy of the summary file to pass to READ_COUNT_CHECKS to handle file names being the same
-    cp ${prefix}_summary.txt ${prefix}_summary_old.txt
+    mv ${prefix}_summary.txt ${prefix}_summary_fairy.txt
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
diff --git a/workflows/phoenix.nf b/workflows/phoenix.nf
index 0815080d..3bfbefff 100644
--- a/workflows/phoenix.nf
+++ b/workflows/phoenix.nf
@@ -127,8 +127,9 @@ workflow PHOENIX_EXTERNAL {
 
         //Combining reads with output of corruption check. By=2 is for getting R1 and R2 results
         //The mapping here is just to get things in the right bracket so we can call var[0]
-        read_stats_ch = INPUT_CHECK.out.reads.join(CORRUPTION_CHECK.out.outcome_to_edit, by: [0,0])
-        .join(CORRUPTION_CHECK.out.outcome.splitCsv(strip:true, by:2).map{meta, fairy_outcome -> [meta, [fairy_outcome[0][0], fairy_outcome[1][0]]]}, by: [0,0])
+        read_stats_ch = INPUT_CHECK.out.reads.join(CORRUPTION_CHECK.out.outcome, by: [0,0])
+            .join(CORRUPTION_CHECK.out.outcome.splitCsv(strip:true, by:2).map{meta, fairy_outcome -> [meta, [fairy_outcome[0][0], fairy_outcome[1][0]]]}, by: [0,0])
+            .filter { it[3].findAll {!it.contains('FAILED')}}
 
         //Get stats on raw reads if the reads aren't corrupted
         GET_RAW_STATS (

From 4e8df4c7a44d09e6567ca44154b1890da0abc0b4 Mon Sep 17 00:00:00 2001
From: slsevilla <slsevilla@gmail.com>
Date: Sun, 31 Mar 2024 05:18:35 +0000
Subject: [PATCH 03/40] refactor: ica handling #149

---
 main.nf                            | 1 +
 modules/local/samplesheet_check.nf | 9 +++------
 nextflow.config                    | 1 +
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/main.nf b/main.nf
index 50b9daef..daf9b032 100755
--- a/main.nf
+++ b/main.nf
@@ -49,6 +49,7 @@ workflow PHOENIX {
     //input on command line
     if (params.input) { ch_input = file(params.input) } else { exit 1, 'For -entry PHOENIX: Input samplesheet not specified!' }
     ch_versions = Channel.empty() // Used to collect the software versions
+    if (params.ica != true && params.ica != false) {exit 1, "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods."}
 
     main:
         PHOENIX_EXTERNAL ( ch_input, ch_versions, true )
diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf
index 952934ae..bc25439b 100644
--- a/modules/local/samplesheet_check.nf
+++ b/modules/local/samplesheet_check.nf
@@ -12,22 +12,19 @@ process SAMPLESHEET_CHECK {
     path("versions.yml"), emit: versions
 
     script: // This script is bundled with the pipeline, in cdcgov/phoenix/bin/
-    // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory.
-    if (params.ica==false) { ica = "" } 
-    else if (params.ica==true) { ica = "python ${workflow.launchDir}/bin/" }
-    else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." }
     // define variables
     def container_version = "base_v2.1.0"
     def container = task.container.toString() - "quay.io/jvhagey/phoenix@"
+    def script = params.ica ? "python ${params.ica_path}/check_samplesheet.py" : "check_samplesheet.py"
     """
-    ${ica}check_samplesheet.py \\
+    ${script} \\
     $samplesheet \\
     samplesheet.valid.csv
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         python: \$(python --version | sed 's/Python //g')
-        check_samplesheet.py: \$(${ica}check_samplesheet.py --version )
+        check_samplesheet.py: ${script} --version )
         phoenix_base_container_tag: ${container_version}
         phoenix_base_container: ${container}
     END_VERSIONS
diff --git a/nextflow.config b/nextflow.config
index 687f8749..4c935260 100755
--- a/nextflow.config
+++ b/nextflow.config
@@ -41,6 +41,7 @@ params {
     // Terra and ICA specific options
     terra                       = false
     ica                         = false
+    ica_path                    = "${launchDir}/bin/"
 
     // Database paths
     //path2db                   = "${baseDir}/assets/databases" // need this for kraken2db if you place those files in the assets folder

From 5ad96bafddabebb5c756d37a643d2492b3a1cb7d Mon Sep 17 00:00:00 2001
From: slsevilla <slsevilla@gmail.com>
Date: Sun, 31 Mar 2024 15:29:14 +0000
Subject: [PATCH 04/40] refactor: updating new ica handling #149

---
 modules/local/fairy_corruption_check.nf | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/modules/local/fairy_corruption_check.nf b/modules/local/fairy_corruption_check.nf
index 8e0ebb6b..104c9c6b 100644
--- a/modules/local/fairy_corruption_check.nf
+++ b/modules/local/fairy_corruption_check.nf
@@ -15,10 +15,6 @@ process CORRUPTION_CHECK {
     path("versions.yml"),                                      emit: versions
 
     script:
-    // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory.
-    if (params.ica==false) { ica = "" } 
-    else if (params.ica==true) { ica = "bash ${workflow.launchDir}/bin/" }
-    else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." }
     // define variables
     def prefix = task.ext.prefix ?: "${meta.id}"
     def num1 = "${reads[0]}".minus(".fastq.gz")
@@ -26,14 +22,15 @@ process CORRUPTION_CHECK {
     def busco_parameter = busco_val ? "-b" : ""
     def container_version = "base_v2.1.0"
     def container = task.container.toString() - "quay.io/jvhagey/phoenix@"
-    """
+    def script = params.ica ? "python ${params.ica_path}/fairy_proc.sh" : "fairy_proc.sh"
+"""
     #set +e
     #check for file integrity and log errors
     #if there is a corruption problem the script will create a *_summaryline.tsv and *.synopsis file for the sample.
-    ${ica}fairy_proc.sh -r ${reads[0]} -p ${prefix} ${busco_parameter}
-    ${ica}fairy_proc.sh -r ${reads[1]} -p ${prefix} ${busco_parameter}
+    ${script} -r ${reads[0]} -p ${prefix} ${busco_parameter}
+    ${script} -r ${reads[1]} -p ${prefix} ${busco_parameter}
 
-    script_version=\$(${ica}fairy_proc.sh -V)
+    script_version=\$(${script} -V)
 
     #making a copy of the summary file to pass to READ_COUNT_CHECKS to handle file names being the same
     mv ${prefix}_summary.txt ${prefix}_summary_fairy.txt

From f131ab602ecc1d760b4a96c5c2060009fb95a303 Mon Sep 17 00:00:00 2001
From: slsevilla <slsevilla@gmail.com>
Date: Sun, 31 Mar 2024 15:32:17 +0000
Subject: [PATCH 05/40] refactor: handling errors, ica handling, filtering #147
 #149

---
 conf/modules.config            |  2 +-
 modules/local/get_raw_stats.nf | 40 +++++++++++++++-------------------
 workflows/phoenix.nf           | 40 +++++++++++++++++++---------------
 3 files changed, 40 insertions(+), 42 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 42764e8d..62982ad7 100755
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -67,7 +67,7 @@ process {
             [
                 path: { "${params.outdir}/${meta.id}/file_integrity" },
                 mode: 'copy',
-                pattern: "*{_summary.txt}"
+                pattern: "*{_summary_rawstats.txt}"
             ]
         ]
     }
diff --git a/modules/local/get_raw_stats.nf b/modules/local/get_raw_stats.nf
index aa3add05..24f1ceef 100644
--- a/modules/local/get_raw_stats.nf
+++ b/modules/local/get_raw_stats.nf
@@ -11,46 +11,40 @@ process GET_RAW_STATS {
     output:
     tuple val(meta), path('*_stats.txt'),                        emit: raw_stats
     tuple val(meta), path('*_raw_read_counts.txt'),              emit: combined_raw_stats
-    tuple val(meta), path('*_summary.txt'),                      emit: outcome
-    path('*_summaryline.tsv'),                    optional:true, emit: summary_line
-    tuple val(meta), path('*_summary_old_2.txt'),                emit: outcome_to_edit
-    tuple val(meta), path('*.synopsis'),          optional:true, emit: synopsis
+    tuple val(meta), path('*_summary_rawstats.txt'),             emit: outcome
+    path('*_summaryline.tsv'),                                   optional:true, emit: summary_line
+    tuple val(meta), path('*.synopsis'),                         optional:true, emit: synopsis
     path("versions.yml"),                                        emit: versions
 
-    when:
-    //if the files are not corrupt then get the read stats
-    "${fairy_corrupt_outcome[0]}" == "PASSED: File ${meta.id}_R1 is not corrupt." && "${fairy_corrupt_outcome[1]}" == "PASSED: File ${meta.id}_R2 is not corrupt."
-
     script: // This script is bundled with the pipeline, in cdcgov/phoenix/bin/
-    // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory.
-    if (params.ica==false) { ica = "" } 
-    else if (params.ica==true) { ica = "python ${workflow.launchDir}/bin/" }
-    else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." }
     // define variables
     def prefix = task.ext.prefix ?: "${meta.id}"
     def busco_parameter = busco_val ? "--busco" : ""
     def container_version = "base_v2.1.0"
     def container = task.container.toString() - "quay.io/jvhagey/phoenix@"
-    def path_to_bin = "${workflow.launchDir}/bin/"
+    def script_q30 = params.ica ? "python ${params.ica_path}/q30.py" : "q30.py"
+    def script_stats = params.ica ? "python ${params.ica_path}/create_raw_stats_output.py" : "create_raw_stats_output.py"
+    def script_fairy = params.ica ? "python ${params.ica_path}/fairy.py" : "fairy.py"
     """
-    ${ica}q30.py -i ${reads[0]} > ${prefix}_R1_stats.txt
-    ${ica}q30.py -i ${reads[1]} > ${prefix}_R2_stats.txt
-    ${ica}create_raw_stats_output.py -n ${prefix} -r1 ${prefix}_R1_stats.txt -r2 ${prefix}_R2_stats.txt
+    ${script_q30} -i ${reads[0]} > ${prefix}_R1_stats.txt
+    ${script_q30} -i ${reads[1]} > ${prefix}_R2_stats.txt
+    ${script_stats} -n ${prefix} -r1 ${prefix}_R1_stats.txt -r2 ${prefix}_R2_stats.txt
 
-    ## checking that read counts match before moving on
+    # making a copy of the summary file - this avoids writing to the previous file
+    cp ${fairy_outcome} ${prefix}_input.txt
 
     # Output check for messages indicating read pairs that do not match
-    ${ica}fairy.py -r ${prefix}_raw_read_counts.txt -f ${fairy_outcome} ${busco_parameter}
+    ${script_fairy} -r ${prefix}_raw_read_counts.txt -f ${prefix}_input.txt ${busco_parameter}
 
-    #making a copy of the summary file to pass to BBMAP_REFORMAT to handle file names being the same
-    cp ${prefix}_summary.txt ${prefix}_summary_old_2.txt
+    # rename output file
+    mv ${prefix}_summary.txt ${prefix}_summary_rawstats.txt
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         python: \$(python --version | sed 's/Python //g')
-        q30.py: \$(${ica}q30.py --version )
-        create_raw_stats_output.py: \$(${ica}create_raw_stats_output.py --version )
-        fairy.py: \$(${ica}fairy.py --version )
+        q30.py: \$(${script_q30} --version )
+        create_raw_stats_output.py: \$(${script_stats} --version )
+        fairy.py: \$(${script_fairy} --version )
         phoenix_base_container_tag: ${container_version}
         phoenix_base_container: ${container}
     END_VERSIONS
diff --git a/workflows/phoenix.nf b/workflows/phoenix.nf
index 3bfbefff..4869176f 100644
--- a/workflows/phoenix.nf
+++ b/workflows/phoenix.nf
@@ -113,7 +113,7 @@ workflow PHOENIX_EXTERNAL {
         )
         ch_versions = ch_versions.mix(INPUT_CHECK.out.versions)
 
-        //unzip any zipped databases
+        // unzip any zipped databases
         ASSET_CHECK (
             params.zipped_sketch, params.custom_mlstdb, kraken2_db_path
         )
@@ -127,13 +127,15 @@ workflow PHOENIX_EXTERNAL {
 
         //Combining reads with output of corruption check. By=2 is for getting R1 and R2 results
         //The mapping here is just to get things in the right bracket so we can call var[0]
-        read_stats_ch = INPUT_CHECK.out.reads.join(CORRUPTION_CHECK.out.outcome, by: [0,0])
-            .join(CORRUPTION_CHECK.out.outcome.splitCsv(strip:true, by:2).map{meta, fairy_outcome -> [meta, [fairy_outcome[0][0], fairy_outcome[1][0]]]}, by: [0,0])
+        read_stats_ch = INPUT_CHECK.out.reads
+            .join(CORRUPTION_CHECK.out.outcome, by: [0,0])
+            .join(CORRUPTION_CHECK.out.outcome.splitCsv(strip:true, by:2)
+            .map{meta, fairy_outcome -> [meta, [fairy_outcome[0][0], fairy_outcome[1][0]]]}, by: [0,0])
             .filter { it[3].findAll {!it.contains('FAILED')}}
 
-        //Get stats on raw reads if the reads aren't corrupted
+        // Get stats on raw reads if the reads aren't corrupted
         GET_RAW_STATS (
-            read_stats_ch, false // false says no busco is being run
+            read_stats_ch, params.run_busco // false says no busco is being run
         )
         ch_versions = ch_versions.mix(GET_RAW_STATS.out.versions)
 
@@ -475,19 +477,21 @@ workflow PHOENIX_EXTERNAL {
         ch_versions    = ch_versions.mix(MULTIQC.out.versions)
     
     emit:
-        scaffolds        = BBMAP_REFORMAT.out.filtered_scaffolds
-        trimmed_reads    = FASTP_TRIMD.out.reads
-        mlst             = DO_MLST.out.checked_MLSTs
-        amrfinder_output = AMRFINDERPLUS_RUN.out.report
-        gamma_ar         = GAMMA_AR.out.gamma
-        phx_summary     = GATHER_SUMMARY_LINES.out.summary_report
-        //output for phylophoenix
-        griphin_tsv      = GRIPHIN.out.griphin_report
-        griphin_excel    = GRIPHIN.out.griphin_tsv_report
-        dir_samplesheet  = GRIPHIN.out.converted_samplesheet
-        //output for ncbi upload 
-        ncbi_sra_sheet       = params.create_ncbi_sheet ? CREATE_NCBI_UPLOAD_SHEET.out.ncbi_sra : null
-        ncbi_biosample_sheet = params.create_ncbi_sheet ? CREATE_NCBI_UPLOAD_SHEET.out.ncbi_biosample : null
+        check = INPUT_CHECK.out.versions
+        // outcome = GET_RAW_STATS.out.outcome
+        // scaffolds        = BBMAP_REFORMAT.out.filtered_scaffolds
+        // trimmed_reads    = FASTP_TRIMD.out.reads
+        // mlst             = DO_MLST.out.checked_MLSTs
+        // amrfinder_output = AMRFINDERPLUS_RUN.out.report
+        // gamma_ar         = GAMMA_AR.out.gamma
+        // phx_summary     = GATHER_SUMMARY_LINES.out.summary_report
+        // //output for phylophoenix
+        // griphin_tsv      = GRIPHIN.out.griphin_report
+        // griphin_excel    = GRIPHIN.out.griphin_tsv_report
+        // dir_samplesheet  = GRIPHIN.out.converted_samplesheet
+        // //output for ncbi upload 
+        // ncbi_sra_sheet       = params.create_ncbi_sheet ? CREATE_NCBI_UPLOAD_SHEET.out.ncbi_sra : null
+        // ncbi_biosample_sheet = params.create_ncbi_sheet ? CREATE_NCBI_UPLOAD_SHEET.out.ncbi_biosample : null
 }
 
 /*

From 401ec10db6bd1686258592b5b8e1824bc859a314 Mon Sep 17 00:00:00 2001
From: slsevilla <slsevilla@gmail.com>
Date: Sun, 31 Mar 2024 15:33:42 +0000
Subject: [PATCH 06/40] refactor: check ica param is as expected 149

---
 main.nf | 33 ++++++++++++++++-----------------
 1 file changed, 16 insertions(+), 17 deletions(-)

diff --git a/main.nf b/main.nf
index daf9b032..b78411f8 100755
--- a/main.nf
+++ b/main.nf
@@ -43,30 +43,29 @@ workflow PHOENIX {
     // Check input path parameters to see if they exist
     def checkPathParamList = [ params.input, params.multiqc_config, params.kraken2db] //removed , params.fasta to stop issue w/connecting to aws and igenomes not used
     for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } }
-
-    // Check mandatory parameters
+    if (params.ica != true && params.ica != false) {exit 1, "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods."}
 
     //input on command line
     if (params.input) { ch_input = file(params.input) } else { exit 1, 'For -entry PHOENIX: Input samplesheet not specified!' }
     ch_versions = Channel.empty() // Used to collect the software versions
-    if (params.ica != true && params.ica != false) {exit 1, "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods."}
-
+    
     main:
         PHOENIX_EXTERNAL ( ch_input, ch_versions, true )
     emit:
-        scaffolds        = PHOENIX_EXTERNAL.out.scaffolds
-        trimmed_reads    = PHOENIX_EXTERNAL.out.trimmed_reads
-        mlst             = PHOENIX_EXTERNAL.out.mlst
-        amrfinder_output = PHOENIX_EXTERNAL.out.amrfinder_output
-        gamma_ar         = PHOENIX_EXTERNAL.out.gamma_ar
-        phx_summary      = PHOENIX_EXTERNAL.out.phx_summary
-        //output for phylophoenix
-        griphin_tsv      = PHOENIX_EXTERNAL.out.griphin_tsv
-        griphin_excel    = PHOENIX_EXTERNAL.out.griphin_excel
-        dir_samplesheet  = PHOENIX_EXTERNAL.out.dir_samplesheet
-        //output for ncbi upload 
-        ncbi_sra_sheet       = params.create_ncbi_sheet ? PHOENIX_EXTERNAL.out.ncbi_sra_sheet : null
-        ncbi_biosample_sheet = params.create_ncbi_sheet ? PHOENIX_EXTERNAL.out.ncbi_biosample_sheet : null
+        check = PHOENIX_EXTERNAL.out.check
+        // scaffolds        = PHOENIX_EXTERNAL.out.scaffolds
+        // trimmed_reads    = PHOENIX_EXTERNAL.out.trimmed_reads
+        // mlst             = PHOENIX_EXTERNAL.out.mlst
+        // amrfinder_output = PHOENIX_EXTERNAL.out.amrfinder_output
+        // gamma_ar         = PHOENIX_EXTERNAL.out.gamma_ar
+        // phx_summary      = PHOENIX_EXTERNAL.out.phx_summary
+        // //output for phylophoenix
+        // griphin_tsv      = PHOENIX_EXTERNAL.out.griphin_tsv
+        // griphin_excel    = PHOENIX_EXTERNAL.out.griphin_excel
+        // dir_samplesheet  = PHOENIX_EXTERNAL.out.dir_samplesheet
+        // //output for ncbi upload 
+        // ncbi_sra_sheet       = params.create_ncbi_sheet ? PHOENIX_EXTERNAL.out.ncbi_sra_sheet : null
+        // ncbi_biosample_sheet = params.create_ncbi_sheet ? PHOENIX_EXTERNAL.out.ncbi_biosample_sheet : null
 }
 
 //

From 47a0c5cf91ab185d2e40b990dcd676371cb2fe4b Mon Sep 17 00:00:00 2001
From: slsevilla <slsevilla@gmail.com>
Date: Sun, 31 Mar 2024 15:35:30 +0000
Subject: [PATCH 07/40] refactor: create ncbi_excel_creation flag #150

---
 main.nf         | 2 +-
 nextflow.config | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/main.nf b/main.nf
index b78411f8..a7a14d98 100755
--- a/main.nf
+++ b/main.nf
@@ -50,7 +50,7 @@ workflow PHOENIX {
     ch_versions = Channel.empty() // Used to collect the software versions
     
     main:
-        PHOENIX_EXTERNAL ( ch_input, ch_versions, true )
+        PHOENIX_EXTERNAL ( ch_input, ch_versions, params.ncbi_excel_creation )
     emit:
         check = PHOENIX_EXTERNAL.out.check
         // scaffolds        = PHOENIX_EXTERNAL.out.scaffolds
diff --git a/nextflow.config b/nextflow.config
index 4c935260..55eb08ef 100755
--- a/nextflow.config
+++ b/nextflow.config
@@ -33,6 +33,7 @@ params {
     use_sra                     = false
 
     // For NCBI spreadsheet creation
+    ncbi_excel_creation         = true
     microbe_example             = "${baseDir}/assets/Microbe.1.0_Example_Data.xlsx"
     sra_metadata                = "${baseDir}/assets/SRA_metadata_example.xlsx"
     osii_bioprojects            = "${baseDir}/assets/osii-bioprojects.yaml"

From 5221309ff887dca4be93c0b4c566725d0d4283a8 Mon Sep 17 00:00:00 2001
From: slsevilla <slsevilla@gmail.com>
Date: Sun, 31 Mar 2024 15:48:22 +0000
Subject: [PATCH 08/40] refactor: filtering strategy bbduk #147

---
 modules/local/bbduk.nf | 4 ----
 workflows/phoenix.nf   | 5 ++++-
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/modules/local/bbduk.nf b/modules/local/bbduk.nf
index b409dbbb..e4b1e8f9 100755
--- a/modules/local/bbduk.nf
+++ b/modules/local/bbduk.nf
@@ -13,10 +13,6 @@ process BBDUK {
     tuple val(meta), path('*.log')     , emit: log
     path "versions.yml"                , emit: versions
 
-    when:
-    //if the files are not corrupt and there are equal number of reads in each file then run bbduk
-    "${fairy_outcome[0]}" == "PASSED: File ${meta.id}_R1 is not corrupt." && "${fairy_outcome[1]}" == "PASSED: File ${meta.id}_R2 is not corrupt." && "${fairy_outcome[2]}" == "PASSED: Read pairs for ${meta.id} are equal."
-
     script:
     def args = task.ext.args ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
diff --git a/workflows/phoenix.nf b/workflows/phoenix.nf
index 4869176f..33e447b2 100644
--- a/workflows/phoenix.nf
+++ b/workflows/phoenix.nf
@@ -140,7 +140,10 @@ workflow PHOENIX_EXTERNAL {
         ch_versions = ch_versions.mix(GET_RAW_STATS.out.versions)
 
         // Combining reads with output of corruption check
-        bbduk_ch = INPUT_CHECK.out.reads.join(GET_RAW_STATS.out.outcome.splitCsv(strip:true, by:3).map{meta, fairy_outcome -> [meta, [fairy_outcome[0][0], fairy_outcome[1][0], fairy_outcome[2][0]]]}, by: [0,0])
+        bbduk_ch = INPUT_CHECK.out.reads
+            .join(GET_RAW_STATS.out.outcome.splitCsv(strip:true, by:3)
+            .map{meta, fairy_outcome -> [meta, [fairy_outcome[0][0], fairy_outcome[1][0], fairy_outcome[2][0]]]}, by: [0,0])
+            .filter { it[3].findAll {!it.contains('FAILED')}}
 
         // Remove PhiX reads
         BBDUK (

From f33a04a8280b26703b4a154795e2d7502b765d74 Mon Sep 17 00:00:00 2001
From: slsevilla <slsevilla@gmail.com>
Date: Sun, 31 Mar 2024 16:31:44 +0000
Subject: [PATCH 09/40] chore: fix meta tuple called

---
 workflows/phoenix.nf | 646 +++++++++++++++++++++----------------------
 1 file changed, 323 insertions(+), 323 deletions(-)

diff --git a/workflows/phoenix.nf b/workflows/phoenix.nf
index 33e447b2..9d7357af 100644
--- a/workflows/phoenix.nf
+++ b/workflows/phoenix.nf
@@ -143,7 +143,7 @@ workflow PHOENIX_EXTERNAL {
         bbduk_ch = INPUT_CHECK.out.reads
             .join(GET_RAW_STATS.out.outcome.splitCsv(strip:true, by:3)
             .map{meta, fairy_outcome -> [meta, [fairy_outcome[0][0], fairy_outcome[1][0], fairy_outcome[2][0]]]}, by: [0,0])
-            .filter { it[3].findAll {!it.contains('FAILED')}}
+            .filter { it[2].findAll {!it.contains('FAILED')}}
 
         // Remove PhiX reads
         BBDUK (
@@ -157,330 +157,330 @@ workflow PHOENIX_EXTERNAL {
         )
         ch_versions = ch_versions.mix(FASTP_TRIMD.out.versions)
 
-        // Rerun on unpaired reads to get stats, nothing removed
-        FASTP_SINGLES (
-            FASTP_TRIMD.out.reads_fail
-        )
-        ch_versions = ch_versions.mix(FASTP_SINGLES.out.versions)
-
-        // Combining fastp json outputs based on meta.id
-        fastp_json_ch = FASTP_TRIMD.out.json.join(FASTP_SINGLES.out.json, by: [0,0])\
-        .join(GET_RAW_STATS.out.combined_raw_stats, by: [0,0])\
-        .join(GET_RAW_STATS.out.outcome_to_edit, by: [0,0])
-
-        // Script gathers data from fastp jsons for pipeline stats file
-        GET_TRIMD_STATS (
-            fastp_json_ch, false // false says no busco is being run
-        )
-        ch_versions = ch_versions.mix(GET_TRIMD_STATS.out.versions)
-
-        // combing fastp_trimd information with fairy check of reads to confirm there are reads after filtering
-        trimd_reads_file_integrity_ch = FASTP_TRIMD.out.reads.join(GET_TRIMD_STATS.out.outcome.splitCsv(strip:true, by:5).map{meta, fairy_outcome -> [meta, [fairy_outcome[0][0], fairy_outcome[1][0], fairy_outcome[2][0], fairy_outcome[3][0], fairy_outcome[4][0]]]}, by: [0,0])
-
-        // Running Fastqc on trimmed reads
-        FASTQCTRIMD (
-            trimd_reads_file_integrity_ch
-        )
-        ch_versions = ch_versions.mix(FASTQCTRIMD.out.versions.first())
-
-        // Checking for Contamination in trimmed reads, creating krona plots and best hit files
-        KRAKEN2_TRIMD (
-            FASTP_TRIMD.out.reads, GET_TRIMD_STATS.out.outcome, "trimd", GET_TRIMD_STATS.out.fastp_total_qc, [], ASSET_CHECK.out.kraken_db, "reads"
-        )
-        ch_versions = ch_versions.mix(KRAKEN2_TRIMD.out.versions)
-
-        SPADES_WF (
-            FASTP_SINGLES.out.reads, \
-            FASTP_TRIMD.out.reads, \
-            GET_TRIMD_STATS.out.fastp_total_qc, \
-            GET_RAW_STATS.out.combined_raw_stats, \
-            [], \
-            KRAKEN2_TRIMD.out.report, \
-            KRAKEN2_TRIMD.out.krona_html, \
-            KRAKEN2_TRIMD.out.k2_bh_summary, \
-            false
-        )
-        ch_versions = ch_versions.mix(SPADES_WF.out.versions)
-
-        // Rename scaffold headers
-        RENAME_FASTA_HEADERS (
-            SPADES_WF.out.spades_ch
-        )
-        ch_versions = ch_versions.mix(RENAME_FASTA_HEADERS.out.versions)
-
-        // Removing scaffolds <500bp
-        BBMAP_REFORMAT (
-            RENAME_FASTA_HEADERS.out.renamed_scaffolds
-        )
-        ch_versions = ch_versions.mix(BBMAP_REFORMAT.out.versions)
-
-        // Combine bbmap log with the fairy outcome file
-        scaffold_check_ch = BBMAP_REFORMAT.out.log.map{meta, log                -> [[id:meta.id], log]}\
-        .join(GET_TRIMD_STATS.out.outcome_to_edit.map{   meta, outcome_to_edit  -> [[id:meta.id], outcome_to_edit]},    by: [0])\
-        .join(GET_RAW_STATS.out.combined_raw_stats.map{meta, combined_raw_stats -> [[id:meta.id], combined_raw_stats]}, by: [0])\
-        .join(GET_TRIMD_STATS.out.fastp_total_qc.map{  meta, fastp_total_qc     -> [[id:meta.id], fastp_total_qc]},     by: [0])\
-        .join(KRAKEN2_TRIMD.out.report.map{            meta, report             -> [[id:meta.id], report]},             by: [0])\
-        .join(KRAKEN2_TRIMD.out.k2_bh_summary.map{     meta, k2_bh_summary      -> [[id:meta.id], k2_bh_summary]},      by: [0])\
-        .join(KRAKEN2_TRIMD.out.krona_html.map{        meta, krona_html         -> [[id:meta.id], krona_html]},         by: [0])
-
-        // Checking that there are still scaffolds left after filtering
-        SCAFFOLD_COUNT_CHECK (
-            scaffold_check_ch, false, params.coverage, params.nodes, params.names
-        )
-        ch_versions = ch_versions.mix(SCAFFOLD_COUNT_CHECK.out.versions)
-
-        //combing scaffolds with scaffold check information to ensure processes that need scaffolds only run when there are scaffolds in the file
-        filtered_scaffolds_ch = BBMAP_REFORMAT.out.filtered_scaffolds.map{    meta, filtered_scaffolds -> [[id:meta.id], filtered_scaffolds]}
-        .join(SCAFFOLD_COUNT_CHECK.out.outcome.splitCsv(strip:true, by:5).map{meta, fairy_outcome      -> [meta, [fairy_outcome[0][0], fairy_outcome[1][0], fairy_outcome[2][0], fairy_outcome[3][0], fairy_outcome[4][0]]]}, by: [0])
-
-        // Running gamma to identify hypervirulence genes in scaffolds
-        GAMMA_HV (
-            filtered_scaffolds_ch, params.hvgamdb
-        )
-        ch_versions = ch_versions.mix(GAMMA_HV.out.versions)
-
-        // Running gamma to identify AR genes in scaffolds
-        GAMMA_AR (
-            filtered_scaffolds_ch, params.ardb
-        )
-        ch_versions = ch_versions.mix(GAMMA_AR.out.versions)
-
-        GAMMA_PF (
-            filtered_scaffolds_ch, params.gamdbpf
-        )
-        ch_versions = ch_versions.mix(GAMMA_PF.out.versions)
-
-        // Getting Assembly Stats
-        QUAST (
-            filtered_scaffolds_ch
-        )
-        ch_versions = ch_versions.mix(QUAST.out.versions)
-
-        // Creating krona plots and best hit files for weighted assembly
-        KRAKEN2_WTASMBLD (
-            BBMAP_REFORMAT.out.filtered_scaffolds, SCAFFOLD_COUNT_CHECK.out.outcome, "wtasmbld", [], QUAST.out.report_tsv, ASSET_CHECK.out.kraken_db, "reads"
-        )
-        ch_versions = ch_versions.mix(KRAKEN2_WTASMBLD.out.versions)
-
-        // combine filtered scaffolds and mash_sketch so mash_sketch goes with each filtered_scaffolds file
-        mash_dist_ch = filtered_scaffolds_ch.combine(ASSET_CHECK.out.mash_sketch)
-
-        // Running Mash distance to get top 20 matches for fastANI to speed things up
-        MASH_DIST (
-            mash_dist_ch
-        )
-        ch_versions = ch_versions.mix(MASH_DIST.out.versions)
-
-        // Combining mash dist with filtered scaffolds and the outcome of the scaffolds count check based on meta.id
-        top_mash_hits_ch = MASH_DIST.out.dist.join(filtered_scaffolds_ch, by: [0])
-
-        // Generate file with list of paths of top taxa for fastANI
-        DETERMINE_TOP_MASH_HITS (
-            top_mash_hits_ch
-        )
-        ch_versions = ch_versions.mix(DETERMINE_TOP_MASH_HITS.out.versions)
-
-        // Combining filtered scaffolds with the top taxa list based on meta.id
-        top_taxa_list_ch = BBMAP_REFORMAT.out.filtered_scaffolds.map{meta, filtered_scaffolds -> [[id:meta.id], filtered_scaffolds]}\
-        .join(DETERMINE_TOP_MASH_HITS.out.top_taxa_list.map{              meta, top_taxa_list      -> [[id:meta.id], top_taxa_list ]}, by: [0])\
-        .join(DETERMINE_TOP_MASH_HITS.out.reference_dir.map{              meta, reference_dir      -> [[id:meta.id], reference_dir ]}, by: [0])
-
-        // Getting species ID
-        FASTANI (
-            top_taxa_list_ch
-        )
-        ch_versions = ch_versions.mix(FASTANI.out.versions)
-
-        // Reformat ANI headers
-        FORMAT_ANI (
-            FASTANI.out.ani
-        )
-        ch_versions = ch_versions.mix(FORMAT_ANI.out.versions)
-
-        // Combining weighted kraken report with the FastANI hit based on meta.id
-        best_hit_ch = KRAKEN2_WTASMBLD.out.k2_bh_summary.map{meta, k2_bh_summary -> [[id:meta.id], k2_bh_summary]}\
-        .join(FORMAT_ANI.out.ani_best_hit.map{               meta, ani_best_hit  -> [[id:meta.id], ani_best_hit ]},  by: [0])\
-        .join(KRAKEN2_TRIMD.out.k2_bh_summary.map{           meta, k2_bh_summary -> [[id:meta.id], k2_bh_summary ]}, by: [0])
-
-        // Getting ID from either FastANI or if fails, from Kraken2
-        DETERMINE_TAXA_ID (
-            best_hit_ch, params.nodes, params.names
-        )
-        ch_versions = ch_versions.mix(DETERMINE_TAXA_ID.out.versions)
-
-        // Perform MLST steps on isolates (with srst2 on internal samples)
-        DO_MLST (
-            BBMAP_REFORMAT.out.filtered_scaffolds, \
-            SCAFFOLD_COUNT_CHECK.out.outcome, \
-            FASTP_TRIMD.out.reads, \
-            DETERMINE_TAXA_ID.out.taxonomy, \
-            ASSET_CHECK.out.mlst_db, \
-            false
-        )
-        ch_versions = ch_versions.mix(DO_MLST.out.versions)
-
-        // get gff and protein files for amrfinder+
-        PROKKA (
-            filtered_scaffolds_ch, [], []
-        )
-        ch_versions = ch_versions.mix(PROKKA.out.versions)
-
-        /*// Fetch AMRFinder Database
-        AMRFINDERPLUS_UPDATE( )
-        ch_versions = ch_versions.mix(AMRFINDERPLUS_UPDATE.out.versions)*/
-
-        // Create file that has the organism name to pass to AMRFinder
-        GET_TAXA_FOR_AMRFINDER (
-            DETERMINE_TAXA_ID.out.taxonomy
-        )
-        ch_versions = ch_versions.mix(GET_TAXA_FOR_AMRFINDER.out.versions)
-
-        // Combining taxa and scaffolds to run amrfinder and get the point mutations.
-        amr_channel = BBMAP_REFORMAT.out.filtered_scaffolds.map{                 meta, reads          -> [[id:meta.id], reads]}\
-        .join(GET_TAXA_FOR_AMRFINDER.out.amrfinder_taxa.splitCsv(strip:true).map{meta, amrfinder_taxa -> [[id:meta.id], amrfinder_taxa ]}, by: [0])\
-        .join(PROKKA.out.faa.map{                                                meta, faa            -> [[id:meta.id], faa ]},            by: [0])\
-        .join(PROKKA.out.gff.map{                                                meta, gff            -> [[id:meta.id], gff ]},            by: [0])
-
-        // Run AMRFinder
-        AMRFINDERPLUS_RUN (
-            amr_channel, params.amrfinder_db
-        )
-        ch_versions = ch_versions.mix(AMRFINDERPLUS_RUN.out.versions)
-
-        // Combining determined taxa with the assembly stats based on meta.id
-        assembly_ratios_ch = DETERMINE_TAXA_ID.out.taxonomy.map{meta, taxonomy   -> [[id:meta.id], taxonomy]}\
-        .join(QUAST.out.report_tsv.map{                         meta, report_tsv -> [[id:meta.id], report_tsv]}, by: [0])
-
-        // Calculating the assembly ratio and gather GC% stats
-        CALCULATE_ASSEMBLY_RATIO (
-            assembly_ratios_ch, params.ncbi_assembly_stats
-        )
-        ch_versions = ch_versions.mix(CALCULATE_ASSEMBLY_RATIO.out.versions)
-
-        GENERATE_PIPELINE_STATS_WF (
-            GET_RAW_STATS.out.combined_raw_stats, \
-            GET_TRIMD_STATS.out.fastp_total_qc, \
-            [], \
-            KRAKEN2_TRIMD.out.report, \
-            KRAKEN2_TRIMD.out.krona_html, \
-            KRAKEN2_TRIMD.out.k2_bh_summary, \
-            RENAME_FASTA_HEADERS.out.renamed_scaffolds, \
-            BBMAP_REFORMAT.out.filtered_scaffolds, \
-            DO_MLST.out.checked_MLSTs, \
-            GAMMA_HV.out.gamma, \
-            GAMMA_AR.out.gamma, \
-            GAMMA_PF.out.gamma, \
-            QUAST.out.report_tsv, \
-            [], [], [], [], \
-            KRAKEN2_WTASMBLD.out.report, \
-            KRAKEN2_WTASMBLD.out.krona_html, \
-            KRAKEN2_WTASMBLD.out.k2_bh_summary, \
-            DETERMINE_TAXA_ID.out.taxonomy, \
-            FORMAT_ANI.out.ani_best_hit, \
-            CALCULATE_ASSEMBLY_RATIO.out.ratio, \
-            AMRFINDERPLUS_RUN.out.mutation_report, \
-            CALCULATE_ASSEMBLY_RATIO.out.gc_content, \
-            false
-        )
-        ch_versions = ch_versions.mix(GENERATE_PIPELINE_STATS_WF.out.versions)
-
-        // Combining output based on meta.id to create summary by sample -- is this verbose, ugly and annoying? yes, if anyone has a slicker way to do this we welcome the input.
-        line_summary_ch = GET_TRIMD_STATS.out.fastp_total_qc.map{meta, fastp_total_qc  -> [[id:meta.id], fastp_total_qc]}\
-        .join(DO_MLST.out.checked_MLSTs.map{                             meta, checked_MLSTs   -> [[id:meta.id], checked_MLSTs]},   by: [0])\
-        .join(GAMMA_HV.out.gamma.map{                                    meta, gamma           -> [[id:meta.id], gamma]},           by: [0])\
-        .join(GAMMA_AR.out.gamma.map{                                    meta, gamma           -> [[id:meta.id], gamma]},           by: [0])\
-        .join(GAMMA_PF.out.gamma.map{                                    meta, gamma           -> [[id:meta.id], gamma]},           by: [0])\
-        .join(QUAST.out.report_tsv.map{                                  meta, report_tsv      -> [[id:meta.id], report_tsv]},      by: [0])\
-        .join(CALCULATE_ASSEMBLY_RATIO.out.ratio.map{                    meta, ratio           -> [[id:meta.id], ratio]},           by: [0])\
-        .join(GENERATE_PIPELINE_STATS_WF.out.pipeline_stats.map{         meta, pipeline_stats  -> [[id:meta.id], pipeline_stats]},  by: [0])\
-        .join(DETERMINE_TAXA_ID.out.taxonomy.map{                        meta, taxonomy        -> [[id:meta.id], taxonomy]},        by: [0])\
-        .join(KRAKEN2_TRIMD.out.k2_bh_summary.map{                       meta, k2_bh_summary   -> [[id:meta.id], k2_bh_summary]},   by: [0])\
-        .join(AMRFINDERPLUS_RUN.out.report.map{                          meta, report          -> [[id:meta.id], report]},          by: [0])\
-        .join(FORMAT_ANI.out.ani_best_hit.map{                           meta, ani_best_hit    -> [[id:meta.id], ani_best_hit]},    by: [0])
-
-        // Generate summary per sample that passed SPAdes
-        CREATE_SUMMARY_LINE (
-            line_summary_ch
-        )
-        ch_versions = ch_versions.mix(CREATE_SUMMARY_LINE.out.versions)
-
-        // Collect all the summary files prior to fetch step to force the fetch process to wait
-        failed_summaries_ch = SPADES_WF.out.line_summary.collect().ifEmpty(params.placeholder) // if no spades failure pass empty file to keep it moving...
-        // If you only run one sample and it fails spades there is nothing in the create line summary so pass an empty list to keep it moving...
-        summaries_ch = CREATE_SUMMARY_LINE.out.line_summary.collect().ifEmpty( [] )
-
-        // This will check the output directory for an files ending in "_summaryline_failure.tsv" and add them to the output channel
-        FETCH_FAILED_SUMMARIES (
-            outdir_path, failed_summaries_ch, summaries_ch
-        )
-        ch_versions = ch_versions.mix(FETCH_FAILED_SUMMARIES.out.versions)
-
-        // combine all line summaries into one channel
-        spades_failure_summaries_ch = FETCH_FAILED_SUMMARIES.out.spades_failure_summary_line
-        fairy_summary_ch = CORRUPTION_CHECK.out.summary_line.collect().ifEmpty( [] )\
-        .combine(GET_RAW_STATS.out.summary_line.collect().ifEmpty( [] ))\
-        .combine(GET_TRIMD_STATS.out.summary_line.collect().ifEmpty( [] ))\
-        .combine(SCAFFOLD_COUNT_CHECK.out.summary_line.collect().ifEmpty( [] ))\
-        .ifEmpty( [] )
-
-        // pulling it all together
-        all_summaries_ch = spades_failure_summaries_ch.combine(failed_summaries_ch).combine(summaries_ch).combine(fairy_summary_ch)
-
-        // Combining sample summaries into final report
-        GATHER_SUMMARY_LINES (
-            all_summaries_ch, outdir_path, false
-        )
-        ch_versions = ch_versions.mix(GATHER_SUMMARY_LINES.out.versions)
-
-        //create GRiPHin report
-        GRIPHIN (
-            all_summaries_ch, INPUT_CHECK.out.valid_samplesheet, params.ardb, outdir_path, params.coverage, true, false
-        )
-        ch_versions = ch_versions.mix(GRIPHIN.out.versions)
-
-        if (ncbi_excel_creation == true && params.create_ncbi_sheet == true) {
-            // requiring files so that this process doesn't start until needed files are made. 
-            required_files_ch = FASTP_TRIMD.out.reads.map{ meta, reads -> reads[0]}.collect().combine(DO_MLST.out.checked_MLSTs.map{ meta, checked_MLSTs -> checked_MLSTs}.collect()).combine(DETERMINE_TAXA_ID.out.taxonomy.map{ meta, taxonomy -> taxonomy}.collect())
-
-            //Fill out NCBI excel sheets for upload based on what PHX found
-            CREATE_NCBI_UPLOAD_SHEET (
-                required_files_ch, params.microbe_example, params.sra_metadata, params.osii_bioprojects, outdir_path, GRIPHIN.out.griphin_tsv_report
-            )
-            ch_versions = ch_versions.mix(CREATE_NCBI_UPLOAD_SHEET.out.versions)
-        }
-
-        // Collecting the software versions
-        CUSTOM_DUMPSOFTWAREVERSIONS (
-            ch_versions.unique().collectFile(name: 'collated_versions.yml')
-        )
-
-        //
-        // MODULE: MultiQC
-        //
-        workflow_summary    = WorkflowPhoenix.paramsSummaryMultiqc(workflow, summary_params)
-        ch_workflow_summary = Channel.value(workflow_summary)
-
-        ch_multiqc_files = Channel.empty()
-        ch_multiqc_files = ch_multiqc_files.mix(Channel.from(ch_multiqc_config))
-        ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_custom_config.collect().ifEmpty([]))
-        ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'))
-        ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect())
-        ch_multiqc_files = ch_multiqc_files.mix(FASTQCTRIMD.out.zip.collect{it[1]}.ifEmpty([]))
-        ch_multiqc_files = ch_multiqc_files.mix(FASTP_TRIMD.out.json.collect{it[1]}.ifEmpty([]))
-        ch_multiqc_files = ch_multiqc_files.mix(FASTP_SINGLES.out.json.collect{it[1]}.ifEmpty([]))
-        ch_multiqc_files = ch_multiqc_files.mix(BBDUK.out.log.collect{it[1]}.ifEmpty([]))
-        ch_multiqc_files = ch_multiqc_files.mix(QUAST.out.report_tsv.collect{it[1]}.ifEmpty([]))
-        ch_multiqc_files = ch_multiqc_files.mix(KRAKEN2_TRIMD.out.report.collect{it[1]}.ifEmpty([]))
-        ch_multiqc_files = ch_multiqc_files.mix(KRAKEN2_WTASMBLD.out.report.collect{it[1]}.ifEmpty([]))
-
-        MULTIQC (
-            ch_multiqc_files.collect()
-        )
-        multiqc_report = MULTIQC.out.report.toList()
-        ch_versions    = ch_versions.mix(MULTIQC.out.versions)
+        // // Rerun on unpaired reads to get stats, nothing removed
+        // FASTP_SINGLES (
+        //     FASTP_TRIMD.out.reads_fail
+        // )
+        // ch_versions = ch_versions.mix(FASTP_SINGLES.out.versions)
+
+        // // Combining fastp json outputs based on meta.id
+        // fastp_json_ch = FASTP_TRIMD.out.json.join(FASTP_SINGLES.out.json, by: [0,0])\
+        // .join(GET_RAW_STATS.out.combined_raw_stats, by: [0,0])\
+        // .join(GET_RAW_STATS.out.outcome_to_edit, by: [0,0])
+
+        // // Script gathers data from fastp jsons for pipeline stats file
+        // GET_TRIMD_STATS (
+        //     fastp_json_ch, false // false says no busco is being run
+        // )
+        // ch_versions = ch_versions.mix(GET_TRIMD_STATS.out.versions)
+
+        // // combing fastp_trimd information with fairy check of reads to confirm there are reads after filtering
+        // trimd_reads_file_integrity_ch = FASTP_TRIMD.out.reads.join(GET_TRIMD_STATS.out.outcome.splitCsv(strip:true, by:5).map{meta, fairy_outcome -> [meta, [fairy_outcome[0][0], fairy_outcome[1][0], fairy_outcome[2][0], fairy_outcome[3][0], fairy_outcome[4][0]]]}, by: [0,0])
+
+        // // Running Fastqc on trimmed reads
+        // FASTQCTRIMD (
+        //     trimd_reads_file_integrity_ch
+        // )
+        // ch_versions = ch_versions.mix(FASTQCTRIMD.out.versions.first())
+
+        // // Checking for Contamination in trimmed reads, creating krona plots and best hit files
+        // KRAKEN2_TRIMD (
+        //     FASTP_TRIMD.out.reads, GET_TRIMD_STATS.out.outcome, "trimd", GET_TRIMD_STATS.out.fastp_total_qc, [], ASSET_CHECK.out.kraken_db, "reads"
+        // )
+        // ch_versions = ch_versions.mix(KRAKEN2_TRIMD.out.versions)
+
+        // SPADES_WF (
+        //     FASTP_SINGLES.out.reads, \
+        //     FASTP_TRIMD.out.reads, \
+        //     GET_TRIMD_STATS.out.fastp_total_qc, \
+        //     GET_RAW_STATS.out.combined_raw_stats, \
+        //     [], \
+        //     KRAKEN2_TRIMD.out.report, \
+        //     KRAKEN2_TRIMD.out.krona_html, \
+        //     KRAKEN2_TRIMD.out.k2_bh_summary, \
+        //     false
+        // )
+        // ch_versions = ch_versions.mix(SPADES_WF.out.versions)
+
+        // // Rename scaffold headers
+        // RENAME_FASTA_HEADERS (
+        //     SPADES_WF.out.spades_ch
+        // )
+        // ch_versions = ch_versions.mix(RENAME_FASTA_HEADERS.out.versions)
+
+        // // Removing scaffolds <500bp
+        // BBMAP_REFORMAT (
+        //     RENAME_FASTA_HEADERS.out.renamed_scaffolds
+        // )
+        // ch_versions = ch_versions.mix(BBMAP_REFORMAT.out.versions)
+
+        // // Combine bbmap log with the fairy outcome file
+        // scaffold_check_ch = BBMAP_REFORMAT.out.log.map{meta, log                -> [[id:meta.id], log]}\
+        // .join(GET_TRIMD_STATS.out.outcome_to_edit.map{   meta, outcome_to_edit  -> [[id:meta.id], outcome_to_edit]},    by: [0])\
+        // .join(GET_RAW_STATS.out.combined_raw_stats.map{meta, combined_raw_stats -> [[id:meta.id], combined_raw_stats]}, by: [0])\
+        // .join(GET_TRIMD_STATS.out.fastp_total_qc.map{  meta, fastp_total_qc     -> [[id:meta.id], fastp_total_qc]},     by: [0])\
+        // .join(KRAKEN2_TRIMD.out.report.map{            meta, report             -> [[id:meta.id], report]},             by: [0])\
+        // .join(KRAKEN2_TRIMD.out.k2_bh_summary.map{     meta, k2_bh_summary      -> [[id:meta.id], k2_bh_summary]},      by: [0])\
+        // .join(KRAKEN2_TRIMD.out.krona_html.map{        meta, krona_html         -> [[id:meta.id], krona_html]},         by: [0])
+
+        // // Checking that there are still scaffolds left after filtering
+        // SCAFFOLD_COUNT_CHECK (
+        //     scaffold_check_ch, false, params.coverage, params.nodes, params.names
+        // )
+        // ch_versions = ch_versions.mix(SCAFFOLD_COUNT_CHECK.out.versions)
+
+        // //combing scaffolds with scaffold check information to ensure processes that need scaffolds only run when there are scaffolds in the file
+        // filtered_scaffolds_ch = BBMAP_REFORMAT.out.filtered_scaffolds.map{    meta, filtered_scaffolds -> [[id:meta.id], filtered_scaffolds]}
+        // .join(SCAFFOLD_COUNT_CHECK.out.outcome.splitCsv(strip:true, by:5).map{meta, fairy_outcome      -> [meta, [fairy_outcome[0][0], fairy_outcome[1][0], fairy_outcome[2][0], fairy_outcome[3][0], fairy_outcome[4][0]]]}, by: [0])
+
+        // // Running gamma to identify hypervirulence genes in scaffolds
+        // GAMMA_HV (
+        //     filtered_scaffolds_ch, params.hvgamdb
+        // )
+        // ch_versions = ch_versions.mix(GAMMA_HV.out.versions)
+
+        // // Running gamma to identify AR genes in scaffolds
+        // GAMMA_AR (
+        //     filtered_scaffolds_ch, params.ardb
+        // )
+        // ch_versions = ch_versions.mix(GAMMA_AR.out.versions)
+
+        // GAMMA_PF (
+        //     filtered_scaffolds_ch, params.gamdbpf
+        // )
+        // ch_versions = ch_versions.mix(GAMMA_PF.out.versions)
+
+        // // Getting Assembly Stats
+        // QUAST (
+        //     filtered_scaffolds_ch
+        // )
+        // ch_versions = ch_versions.mix(QUAST.out.versions)
+
+        // // Creating krona plots and best hit files for weighted assembly
+        // KRAKEN2_WTASMBLD (
+        //     BBMAP_REFORMAT.out.filtered_scaffolds, SCAFFOLD_COUNT_CHECK.out.outcome, "wtasmbld", [], QUAST.out.report_tsv, ASSET_CHECK.out.kraken_db, "reads"
+        // )
+        // ch_versions = ch_versions.mix(KRAKEN2_WTASMBLD.out.versions)
+
+        // // combine filtered scaffolds and mash_sketch so mash_sketch goes with each filtered_scaffolds file
+        // mash_dist_ch = filtered_scaffolds_ch.combine(ASSET_CHECK.out.mash_sketch)
+
+        // // Running Mash distance to get top 20 matches for fastANI to speed things up
+        // MASH_DIST (
+        //     mash_dist_ch
+        // )
+        // ch_versions = ch_versions.mix(MASH_DIST.out.versions)
+
+        // // Combining mash dist with filtered scaffolds and the outcome of the scaffolds count check based on meta.id
+        // top_mash_hits_ch = MASH_DIST.out.dist.join(filtered_scaffolds_ch, by: [0])
+
+        // // Generate file with list of paths of top taxa for fastANI
+        // DETERMINE_TOP_MASH_HITS (
+        //     top_mash_hits_ch
+        // )
+        // ch_versions = ch_versions.mix(DETERMINE_TOP_MASH_HITS.out.versions)
+
+        // // Combining filtered scaffolds with the top taxa list based on meta.id
+        // top_taxa_list_ch = BBMAP_REFORMAT.out.filtered_scaffolds.map{meta, filtered_scaffolds -> [[id:meta.id], filtered_scaffolds]}\
+        // .join(DETERMINE_TOP_MASH_HITS.out.top_taxa_list.map{              meta, top_taxa_list      -> [[id:meta.id], top_taxa_list ]}, by: [0])\
+        // .join(DETERMINE_TOP_MASH_HITS.out.reference_dir.map{              meta, reference_dir      -> [[id:meta.id], reference_dir ]}, by: [0])
+
+        // // Getting species ID
+        // FASTANI (
+        //     top_taxa_list_ch
+        // )
+        // ch_versions = ch_versions.mix(FASTANI.out.versions)
+
+        // // Reformat ANI headers
+        // FORMAT_ANI (
+        //     FASTANI.out.ani
+        // )
+        // ch_versions = ch_versions.mix(FORMAT_ANI.out.versions)
+
+        // // Combining weighted kraken report with the FastANI hit based on meta.id
+        // best_hit_ch = KRAKEN2_WTASMBLD.out.k2_bh_summary.map{meta, k2_bh_summary -> [[id:meta.id], k2_bh_summary]}\
+        // .join(FORMAT_ANI.out.ani_best_hit.map{               meta, ani_best_hit  -> [[id:meta.id], ani_best_hit ]},  by: [0])\
+        // .join(KRAKEN2_TRIMD.out.k2_bh_summary.map{           meta, k2_bh_summary -> [[id:meta.id], k2_bh_summary ]}, by: [0])
+
+        // // Getting ID from either FastANI or if fails, from Kraken2
+        // DETERMINE_TAXA_ID (
+        //     best_hit_ch, params.nodes, params.names
+        // )
+        // ch_versions = ch_versions.mix(DETERMINE_TAXA_ID.out.versions)
+
+        // // Perform MLST steps on isolates (with srst2 on internal samples)
+        // DO_MLST (
+        //     BBMAP_REFORMAT.out.filtered_scaffolds, \
+        //     SCAFFOLD_COUNT_CHECK.out.outcome, \
+        //     FASTP_TRIMD.out.reads, \
+        //     DETERMINE_TAXA_ID.out.taxonomy, \
+        //     ASSET_CHECK.out.mlst_db, \
+        //     false
+        // )
+        // ch_versions = ch_versions.mix(DO_MLST.out.versions)
+
+        // // get gff and protein files for amrfinder+
+        // PROKKA (
+        //     filtered_scaffolds_ch, [], []
+        // )
+        // ch_versions = ch_versions.mix(PROKKA.out.versions)
+
+        // /*// Fetch AMRFinder Database
+        // AMRFINDERPLUS_UPDATE( )
+        // ch_versions = ch_versions.mix(AMRFINDERPLUS_UPDATE.out.versions)*/
+
+        // // Create file that has the organism name to pass to AMRFinder
+        // GET_TAXA_FOR_AMRFINDER (
+        //     DETERMINE_TAXA_ID.out.taxonomy
+        // )
+        // ch_versions = ch_versions.mix(GET_TAXA_FOR_AMRFINDER.out.versions)
+
+        // // Combining taxa and scaffolds to run amrfinder and get the point mutations.
+        // amr_channel = BBMAP_REFORMAT.out.filtered_scaffolds.map{                 meta, reads          -> [[id:meta.id], reads]}\
+        // .join(GET_TAXA_FOR_AMRFINDER.out.amrfinder_taxa.splitCsv(strip:true).map{meta, amrfinder_taxa -> [[id:meta.id], amrfinder_taxa ]}, by: [0])\
+        // .join(PROKKA.out.faa.map{                                                meta, faa            -> [[id:meta.id], faa ]},            by: [0])\
+        // .join(PROKKA.out.gff.map{                                                meta, gff            -> [[id:meta.id], gff ]},            by: [0])
+
+        // // Run AMRFinder
+        // AMRFINDERPLUS_RUN (
+        //     amr_channel, params.amrfinder_db
+        // )
+        // ch_versions = ch_versions.mix(AMRFINDERPLUS_RUN.out.versions)
+
+        // // Combining determined taxa with the assembly stats based on meta.id
+        // assembly_ratios_ch = DETERMINE_TAXA_ID.out.taxonomy.map{meta, taxonomy   -> [[id:meta.id], taxonomy]}\
+        // .join(QUAST.out.report_tsv.map{                         meta, report_tsv -> [[id:meta.id], report_tsv]}, by: [0])
+
+        // // Calculating the assembly ratio and gather GC% stats
+        // CALCULATE_ASSEMBLY_RATIO (
+        //     assembly_ratios_ch, params.ncbi_assembly_stats
+        // )
+        // ch_versions = ch_versions.mix(CALCULATE_ASSEMBLY_RATIO.out.versions)
+
+        // GENERATE_PIPELINE_STATS_WF (
+        //     GET_RAW_STATS.out.combined_raw_stats, \
+        //     GET_TRIMD_STATS.out.fastp_total_qc, \
+        //     [], \
+        //     KRAKEN2_TRIMD.out.report, \
+        //     KRAKEN2_TRIMD.out.krona_html, \
+        //     KRAKEN2_TRIMD.out.k2_bh_summary, \
+        //     RENAME_FASTA_HEADERS.out.renamed_scaffolds, \
+        //     BBMAP_REFORMAT.out.filtered_scaffolds, \
+        //     DO_MLST.out.checked_MLSTs, \
+        //     GAMMA_HV.out.gamma, \
+        //     GAMMA_AR.out.gamma, \
+        //     GAMMA_PF.out.gamma, \
+        //     QUAST.out.report_tsv, \
+        //     [], [], [], [], \
+        //     KRAKEN2_WTASMBLD.out.report, \
+        //     KRAKEN2_WTASMBLD.out.krona_html, \
+        //     KRAKEN2_WTASMBLD.out.k2_bh_summary, \
+        //     DETERMINE_TAXA_ID.out.taxonomy, \
+        //     FORMAT_ANI.out.ani_best_hit, \
+        //     CALCULATE_ASSEMBLY_RATIO.out.ratio, \
+        //     AMRFINDERPLUS_RUN.out.mutation_report, \
+        //     CALCULATE_ASSEMBLY_RATIO.out.gc_content, \
+        //     false
+        // )
+        // ch_versions = ch_versions.mix(GENERATE_PIPELINE_STATS_WF.out.versions)
+
+        // // Combining output based on meta.id to create summary by sample -- is this verbose, ugly and annoying? yes, if anyone has a slicker way to do this we welcome the input.
+        // line_summary_ch = GET_TRIMD_STATS.out.fastp_total_qc.map{meta, fastp_total_qc  -> [[id:meta.id], fastp_total_qc]}\
+        // .join(DO_MLST.out.checked_MLSTs.map{                             meta, checked_MLSTs   -> [[id:meta.id], checked_MLSTs]},   by: [0])\
+        // .join(GAMMA_HV.out.gamma.map{                                    meta, gamma           -> [[id:meta.id], gamma]},           by: [0])\
+        // .join(GAMMA_AR.out.gamma.map{                                    meta, gamma           -> [[id:meta.id], gamma]},           by: [0])\
+        // .join(GAMMA_PF.out.gamma.map{                                    meta, gamma           -> [[id:meta.id], gamma]},           by: [0])\
+        // .join(QUAST.out.report_tsv.map{                                  meta, report_tsv      -> [[id:meta.id], report_tsv]},      by: [0])\
+        // .join(CALCULATE_ASSEMBLY_RATIO.out.ratio.map{                    meta, ratio           -> [[id:meta.id], ratio]},           by: [0])\
+        // .join(GENERATE_PIPELINE_STATS_WF.out.pipeline_stats.map{         meta, pipeline_stats  -> [[id:meta.id], pipeline_stats]},  by: [0])\
+        // .join(DETERMINE_TAXA_ID.out.taxonomy.map{                        meta, taxonomy        -> [[id:meta.id], taxonomy]},        by: [0])\
+        // .join(KRAKEN2_TRIMD.out.k2_bh_summary.map{                       meta, k2_bh_summary   -> [[id:meta.id], k2_bh_summary]},   by: [0])\
+        // .join(AMRFINDERPLUS_RUN.out.report.map{                          meta, report          -> [[id:meta.id], report]},          by: [0])\
+        // .join(FORMAT_ANI.out.ani_best_hit.map{                           meta, ani_best_hit    -> [[id:meta.id], ani_best_hit]},    by: [0])
+
+        // // Generate summary per sample that passed SPAdes
+        // CREATE_SUMMARY_LINE (
+        //     line_summary_ch
+        // )
+        // ch_versions = ch_versions.mix(CREATE_SUMMARY_LINE.out.versions)
+
+        // // Collect all the summary files prior to fetch step to force the fetch process to wait
+        // failed_summaries_ch = SPADES_WF.out.line_summary.collect().ifEmpty(params.placeholder) // if no spades failure pass empty file to keep it moving...
+        // // If you only run one sample and it fails spades there is nothing in the create line summary so pass an empty list to keep it moving...
+        // summaries_ch = CREATE_SUMMARY_LINE.out.line_summary.collect().ifEmpty( [] )
+
+        // // This will check the output directory for an files ending in "_summaryline_failure.tsv" and add them to the output channel
+        // FETCH_FAILED_SUMMARIES (
+        //     outdir_path, failed_summaries_ch, summaries_ch
+        // )
+        // ch_versions = ch_versions.mix(FETCH_FAILED_SUMMARIES.out.versions)
+
+        // // combine all line summaries into one channel
+        // spades_failure_summaries_ch = FETCH_FAILED_SUMMARIES.out.spades_failure_summary_line
+        // fairy_summary_ch = CORRUPTION_CHECK.out.summary_line.collect().ifEmpty( [] )\
+        // .combine(GET_RAW_STATS.out.summary_line.collect().ifEmpty( [] ))\
+        // .combine(GET_TRIMD_STATS.out.summary_line.collect().ifEmpty( [] ))\
+        // .combine(SCAFFOLD_COUNT_CHECK.out.summary_line.collect().ifEmpty( [] ))\
+        // .ifEmpty( [] )
+
+        // // pulling it all together
+        // all_summaries_ch = spades_failure_summaries_ch.combine(failed_summaries_ch).combine(summaries_ch).combine(fairy_summary_ch)
+
+        // // Combining sample summaries into final report
+        // GATHER_SUMMARY_LINES (
+        //     all_summaries_ch, outdir_path, false
+        // )
+        // ch_versions = ch_versions.mix(GATHER_SUMMARY_LINES.out.versions)
+
+        // //create GRiPHin report
+        // GRIPHIN (
+        //     all_summaries_ch, INPUT_CHECK.out.valid_samplesheet, params.ardb, outdir_path, params.coverage, true, false
+        // )
+        // ch_versions = ch_versions.mix(GRIPHIN.out.versions)
+
+        // if (ncbi_excel_creation == true && params.create_ncbi_sheet == true) {
+        //     // requiring files so that this process doesn't start until needed files are made. 
+        //     required_files_ch = FASTP_TRIMD.out.reads.map{ meta, reads -> reads[0]}.collect().combine(DO_MLST.out.checked_MLSTs.map{ meta, checked_MLSTs -> checked_MLSTs}.collect()).combine(DETERMINE_TAXA_ID.out.taxonomy.map{ meta, taxonomy -> taxonomy}.collect())
+
+        //     //Fill out NCBI excel sheets for upload based on what PHX found
+        //     CREATE_NCBI_UPLOAD_SHEET (
+        //         required_files_ch, params.microbe_example, params.sra_metadata, params.osii_bioprojects, outdir_path, GRIPHIN.out.griphin_tsv_report
+        //     )
+        //     ch_versions = ch_versions.mix(CREATE_NCBI_UPLOAD_SHEET.out.versions)
+        // }
+
+        // // Collecting the software versions
+        // CUSTOM_DUMPSOFTWAREVERSIONS (
+        //     ch_versions.unique().collectFile(name: 'collated_versions.yml')
+        // )
+
+        // //
+        // // MODULE: MultiQC
+        // //
+        // workflow_summary    = WorkflowPhoenix.paramsSummaryMultiqc(workflow, summary_params)
+        // ch_workflow_summary = Channel.value(workflow_summary)
+
+        // ch_multiqc_files = Channel.empty()
+        // ch_multiqc_files = ch_multiqc_files.mix(Channel.from(ch_multiqc_config))
+        // ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_custom_config.collect().ifEmpty([]))
+        // ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'))
+        // ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect())
+        // ch_multiqc_files = ch_multiqc_files.mix(FASTQCTRIMD.out.zip.collect{it[1]}.ifEmpty([]))
+        // ch_multiqc_files = ch_multiqc_files.mix(FASTP_TRIMD.out.json.collect{it[1]}.ifEmpty([]))
+        // ch_multiqc_files = ch_multiqc_files.mix(FASTP_SINGLES.out.json.collect{it[1]}.ifEmpty([]))
+        // ch_multiqc_files = ch_multiqc_files.mix(BBDUK.out.log.collect{it[1]}.ifEmpty([]))
+        // ch_multiqc_files = ch_multiqc_files.mix(QUAST.out.report_tsv.collect{it[1]}.ifEmpty([]))
+        // ch_multiqc_files = ch_multiqc_files.mix(KRAKEN2_TRIMD.out.report.collect{it[1]}.ifEmpty([]))
+        // ch_multiqc_files = ch_multiqc_files.mix(KRAKEN2_WTASMBLD.out.report.collect{it[1]}.ifEmpty([]))
+
+        // MULTIQC (
+        //     ch_multiqc_files.collect()
+        // )
+        // multiqc_report = MULTIQC.out.report.toList()
+        // ch_versions    = ch_versions.mix(MULTIQC.out.versions)
     
     emit:
-        check = INPUT_CHECK.out.versions
+        check = FASTP_TRIMD.out.reads
         // outcome = GET_RAW_STATS.out.outcome
         // scaffolds        = BBMAP_REFORMAT.out.filtered_scaffolds
         // trimmed_reads    = FASTP_TRIMD.out.reads

From e9abaa22377d0f50afc41c8bf7db47a78d953fc5 Mon Sep 17 00:00:00 2001
From: slsevilla <slsevilla@gmail.com>
Date: Sun, 31 Mar 2024 16:32:21 +0000
Subject: [PATCH 10/40] refactor: move fastp variables to config #150

---
 nextflow.config      | 2 ++
 workflows/phoenix.nf | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/nextflow.config b/nextflow.config
index 55eb08ef..a6841eb1 100755
--- a/nextflow.config
+++ b/nextflow.config
@@ -27,6 +27,8 @@ params {
     // Params for filtering
     minlength                   = 500
     phred                       = 33
+    save_trimmed_fail           = true
+    save_merged                 = false
 
     // Additional input parameters for -entry SRA and CDC_SRA
     input_sra                   = null
diff --git a/workflows/phoenix.nf b/workflows/phoenix.nf
index 9d7357af..df8e7c93 100644
--- a/workflows/phoenix.nf
+++ b/workflows/phoenix.nf
@@ -153,7 +153,7 @@ workflow PHOENIX_EXTERNAL {
 
         // Trim and remove low quality reads
         FASTP_TRIMD (
-            BBDUK.out.reads, true, false
+            BBDUK.out.reads, params.save_trimmed_fail, params.save_merged
         )
         ch_versions = ch_versions.mix(FASTP_TRIMD.out.versions)
 

From 3c87d90aac30e9420806de37ec8752892d9b7b61 Mon Sep 17 00:00:00 2001
From: slsevilla <slsevilla@gmail.com>
Date: Sun, 31 Mar 2024 21:06:59 +0000
Subject: [PATCH 11/40] refactor: ica handling, filtering #147 #149

---
 modules/local/get_trimd_stats.nf | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/modules/local/get_trimd_stats.nf b/modules/local/get_trimd_stats.nf
index a14d4653..6ea2a575 100644
--- a/modules/local/get_trimd_stats.nf
+++ b/modules/local/get_trimd_stats.nf
@@ -13,41 +13,39 @@ process GET_TRIMD_STATS {
 
     output:
     tuple val(meta), path('*_trimmed_read_counts.txt'),          emit: fastp_total_qc
-    tuple val(meta), path('*_summary.txt'),                      emit: outcome
-    path('*_summaryline.tsv'),                    optional:true, emit: summary_line
-    tuple val(meta), path('*_summary_old_3.txt'),                emit: outcome_to_edit
-    tuple val(meta), path('*.synopsis'),          optional:true, emit: synopsis
+    path('*_summaryline.tsv'),                                   optional:true, emit: summary_line
+    tuple val(meta), path('*_summary_fastp.txt'),                emit: outcome
+    tuple val(meta), path('*.synopsis'),                         optional:true, emit: synopsis
     path("versions.yml"),                                        emit: versions
 
     script: // This script is bundled with the pipeline, in cdcgov/phoenix/bin/
-    // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory.
-    if (params.ica==false) { ica = "" } 
-    else if (params.ica==true) { ica = "python ${workflow.launchDir}/bin/" }
-    else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." }
     // define variables
     def prefix = task.ext.prefix ?: "${meta.id}"
     def busco_parameter = busco_val ? "--busco" : ""
     def container_version = "base_v2.1.0"
     def container = task.container.toString() - "quay.io/jvhagey/phoenix@"
+    def script_fastp = params.ica ? "python ${params.ica_path}/FastP_QC.py" : "FastP_QC.py"
+    def script_fairy = params.ica ? "python ${params.ica_path}/fairy.py" : "fairy.py"
     """
-    ${ica}FastP_QC.py \\
+    ${script_fastp} \\
       --trimmed_json ${fastp_trimd_json} \\
       --single_json ${fastp_singles_json} \\
       --name ${prefix}
 
-    # Check that there are still reads in R1 and R2 before fastqc. If there aren't reads then fastqc dies.
+    # making a copy of the summary file - this avoids writing to the previous file
+    cp ${fairy_outcome} ${prefix}_input.txt
 
     # Output check for messages indicating there are no trimmed reads after filtering.
-    ${ica}fairy.py -r ${raw_qc} -f ${fairy_outcome} -t ${prefix}_trimmed_read_counts.txt ${busco_parameter}
+    ${script_fairy} -r ${raw_qc} -f ${prefix}_input.txt -t ${prefix}_trimmed_read_counts.txt ${busco_parameter}
 
     #making a copy of the summary file to pass to BBMAP_REFORMAT to handle file names being the same
-    cp ${prefix}_summary.txt ${prefix}_summary_old_3.txt
+    mv ${prefix}_summary.txt ${prefix}_summary_fastp.txt
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         python: \$(python --version | sed 's/Python //g')
-        fairy.py: \$( ${ica}fairy.py --version )
-        FastP_QC.py: \$(${ica}FastP_QC.py --version )
+        fairy.py: \$( ${script_fairy} --version )
+        FastP_QC.py: \$(${script_fastp} --version )
         phoenix_base_container_tag: ${container_version}
         phoenix_base_container: ${container}
     END_VERSIONS

From fccbc89f4ae22c711960cf571300965bf890c4cb Mon Sep 17 00:00:00 2001
From: slsevilla <slsevilla@gmail.com>
Date: Sun, 31 Mar 2024 21:07:45 +0000
Subject: [PATCH 12/40] refactor: ica handling #147

---
 modules/local/fastp_singles.nf | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/modules/local/fastp_singles.nf b/modules/local/fastp_singles.nf
index 6328f8ed..dd4a1c60 100755
--- a/modules/local/fastp_singles.nf
+++ b/modules/local/fastp_singles.nf
@@ -19,14 +19,11 @@ process FASTP_SINGLES {
     task.ext.when == null || task.ext.when
 
     script:
-    // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory.
-    if (params.ica==false) { ica = "" } 
-    else if (params.ica==true) { ica = "bash ${workflow.launchDir}/bin/" }
-    else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." }
     // define variables
     def args = task.ext.args ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
     def container = task.container.toString() - "staphb/fastp@"
+    def script = params.ica ? "${params.ica_path}/create_empty_fastp_json.sh" : "create_empty_fastp_json.sh"
     """
     echo "Debugging: Emptiness of reads[0] and reads[1]" > debug_status.log
     if [[ ! -s ${reads[0]} ]] && [[ ! -s ${reads[1]} ]]; then
@@ -34,7 +31,7 @@ process FASTP_SINGLES {
         echo "!!!!! - Both are empty"
         # Both are empty, do nothing??? Nope we handle now
         #Create psuedo file as empty aint cutting it
-        ${ica}create_empty_fastp_json.sh -n ${prefix}
+        ${script} -n ${prefix}
         touch "${prefix}_empty.html"
         touch ${prefix}.singles.fastq
         gzip ${prefix}.singles.fastq
@@ -69,7 +66,7 @@ process FASTP_SINGLES {
             2> ${prefix}.fastp.log
     fi
 
-    script_version=\$(${ica}create_empty_fastp_json.sh -V)
+    script_version=\$(${script} -V)
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":

From 25a2ca6d25021d122ad0f1a6cbe30ece69146a9c Mon Sep 17 00:00:00 2001
From: slsevilla <slsevilla@gmail.com>
Date: Sun, 31 Mar 2024 21:09:16 +0000
Subject: [PATCH 13/40] refactor: ica handling, filtering #147 #149

---
 workflows/phoenix.nf | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/workflows/phoenix.nf b/workflows/phoenix.nf
index df8e7c93..1d4d9a2c 100644
--- a/workflows/phoenix.nf
+++ b/workflows/phoenix.nf
@@ -157,22 +157,22 @@ workflow PHOENIX_EXTERNAL {
         )
         ch_versions = ch_versions.mix(FASTP_TRIMD.out.versions)
 
-        // // Rerun on unpaired reads to get stats, nothing removed
-        // FASTP_SINGLES (
-        //     FASTP_TRIMD.out.reads_fail
-        // )
-        // ch_versions = ch_versions.mix(FASTP_SINGLES.out.versions)
+        // Rerun on unpaired reads to get stats, nothing removed
+        FASTP_SINGLES (
+            FASTP_TRIMD.out.reads_fail
+        )
+        ch_versions = ch_versions.mix(FASTP_SINGLES.out.versions)
 
-        // // Combining fastp json outputs based on meta.id
-        // fastp_json_ch = FASTP_TRIMD.out.json.join(FASTP_SINGLES.out.json, by: [0,0])\
-        // .join(GET_RAW_STATS.out.combined_raw_stats, by: [0,0])\
-        // .join(GET_RAW_STATS.out.outcome_to_edit, by: [0,0])
+        // Combining fastp json outputs based on meta.id
+        fastp_json_ch = FASTP_TRIMD.out.json.join(FASTP_SINGLES.out.json, by: [0,0])\
+        .join(GET_RAW_STATS.out.combined_raw_stats, by: [0,0])\
+        .join(GET_RAW_STATS.out.outcome, by: [0,0])
 
-        // // Script gathers data from fastp jsons for pipeline stats file
-        // GET_TRIMD_STATS (
-        //     fastp_json_ch, false // false says no busco is being run
-        // )
-        // ch_versions = ch_versions.mix(GET_TRIMD_STATS.out.versions)
+        // Script gathers data from fastp jsons for pipeline stats file
+        GET_TRIMD_STATS (
+            fastp_json_ch, params.run_busco // false says no busco is being run
+        )
+        ch_versions = ch_versions.mix(GET_TRIMD_STATS.out.versions)
 
         // // combing fastp_trimd information with fairy check of reads to confirm there are reads after filtering
         // trimd_reads_file_integrity_ch = FASTP_TRIMD.out.reads.join(GET_TRIMD_STATS.out.outcome.splitCsv(strip:true, by:5).map{meta, fairy_outcome -> [meta, [fairy_outcome[0][0], fairy_outcome[1][0], fairy_outcome[2][0], fairy_outcome[3][0], fairy_outcome[4][0]]]}, by: [0,0])

From 1a6fa53d36ed47109b5c7bae502af59b6021646e Mon Sep 17 00:00:00 2001
From: slsevilla <slsevilla@gmail.com>
Date: Sun, 31 Mar 2024 21:19:46 +0000
Subject: [PATCH 14/40] refactor: fastqc ica handling, filtering #147 #149

---
 modules/local/fastqc.nf |  4 ----
 workflows/phoenix.nf    | 17 ++++++++++-------
 2 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/modules/local/fastqc.nf b/modules/local/fastqc.nf
index aa7cf58c..863971ce 100755
--- a/modules/local/fastqc.nf
+++ b/modules/local/fastqc.nf
@@ -7,10 +7,6 @@ process FASTQC {
     input:
     tuple val(meta), path(reads), val(fairy_outcome)
 
-    when:
-    //if there are scaffolds left after filtering
-    "${fairy_outcome[3]}" == "PASSED: There are reads in ${meta.id} R1/R2 after trimming."
-
     output:
     tuple val(meta), path("*.html"), emit: html
     tuple val(meta), path("*.zip") , emit: zip
diff --git a/workflows/phoenix.nf b/workflows/phoenix.nf
index 1d4d9a2c..bddc3b3b 100644
--- a/workflows/phoenix.nf
+++ b/workflows/phoenix.nf
@@ -174,14 +174,17 @@ workflow PHOENIX_EXTERNAL {
         )
         ch_versions = ch_versions.mix(GET_TRIMD_STATS.out.versions)
 
-        // // combing fastp_trimd information with fairy check of reads to confirm there are reads after filtering
-        // trimd_reads_file_integrity_ch = FASTP_TRIMD.out.reads.join(GET_TRIMD_STATS.out.outcome.splitCsv(strip:true, by:5).map{meta, fairy_outcome -> [meta, [fairy_outcome[0][0], fairy_outcome[1][0], fairy_outcome[2][0], fairy_outcome[3][0], fairy_outcome[4][0]]]}, by: [0,0])
+        // combing fastp_trimd information with fairy check of reads to confirm there are reads after filtering
+        trimd_reads_file_integrity_ch = FASTP_TRIMD.out.reads
+            .join(GET_TRIMD_STATS.out.outcome.splitCsv(strip:true, by:5)
+            .map{meta, fairy_outcome -> [meta, [fairy_outcome[0][0], fairy_outcome[1][0], fairy_outcome[2][0], fairy_outcome[3][0], fairy_outcome[4][0]]]}, by: [0,0])
+            .filter { it[2].findAll {!it.contains('FAILED')}}
 
-        // // Running Fastqc on trimmed reads
-        // FASTQCTRIMD (
-        //     trimd_reads_file_integrity_ch
-        // )
-        // ch_versions = ch_versions.mix(FASTQCTRIMD.out.versions.first())
+        // Running Fastqc on trimmed reads
+        FASTQCTRIMD (
+            trimd_reads_file_integrity_ch
+        )
+        ch_versions = ch_versions.mix(FASTQCTRIMD.out.versions.first())
 
         // // Checking for Contamination in trimmed reads, creating krona plots and best hit files
         // KRAKEN2_TRIMD (

From 18132a9af8c6cd0a5e549bc88871458dd5d36318 Mon Sep 17 00:00:00 2001
From: slsevilla <slsevilla@gmail.com>
Date: Mon, 1 Apr 2024 03:42:43 +0000
Subject: [PATCH 15/40] refactor: move kraken params to config #150

---
 nextflow.config | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/nextflow.config b/nextflow.config
index a6841eb1..827c48f1 100755
--- a/nextflow.config
+++ b/nextflow.config
@@ -29,6 +29,8 @@ params {
     phred                       = 33
     save_trimmed_fail           = true
     save_merged                 = false
+    save_output_fastqs          = true
+    save_reads_assignment       = true
 
     // Additional input parameters for -entry SRA and CDC_SRA
     input_sra                   = null

From 10c7bdcb90a727bfcc08619b71c1b46ddbaef3a4 Mon Sep 17 00:00:00 2001
From: slsevilla <slsevilla@gmail.com>
Date: Mon, 1 Apr 2024 03:43:42 +0000
Subject: [PATCH 16/40] refactor: krakenbh handle ica and terra #149

---
 modules/local/kraken_bh.nf | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/modules/local/kraken_bh.nf b/modules/local/kraken_bh.nf
index 508a3531..a5f9af65 100644
--- a/modules/local/kraken_bh.nf
+++ b/modules/local/kraken_bh.nf
@@ -13,22 +13,16 @@ process KRAKEN_BEST_HIT {
     path("versions.yml")           , emit: versions
 
     script: // This script is bundled with the pipeline, in cdcgov/phoenix/bin/
-    // terra=true sets paths for bc/wget for terra container paths
-    if (params.terra==false) { terra = ""} 
-    else if (params.terra==true) { terra = "-t terra" }
-    else { error "Please set params.terra to either \"true\" or \"false\"" }
-    // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory.
-    if (params.ica==false) { ica = "" } 
-    else if (params.ica==true) { ica = "bash ${workflow.launchDir}/bin/" }
-    else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." }
     // define variables
     def prefix = task.ext.prefix ?: "${meta.id}"
     def container_version = "base_v2.1.0"
     def container = task.container.toString() - "quay.io/jvhagey/phoenix@"
+    def script = params.ica ? "${params.ica_path}/kraken2_best_hit.sh" : "kraken2_best_hit.sh"
+    def terra = params.terra ? "-t terra" : ""
     """
-    ${ica}kraken2_best_hit.sh -i $kraken_summary -q $count_file -n ${prefix} $terra
+    ${script} -i $kraken_summary -q $count_file -n ${prefix} $terra
 
-    script_version=\$(${ica}kraken2_best_hit.sh -V)
+    script_version=\$(${script} -V)
 
     mv ${prefix}.summary.txt ${prefix}.kraken2_${kraken_type}.top_kraken_hit.txt
 

From 606e6e5b758e9d4286b7fcc9936dd24f3a4d2879 Mon Sep 17 00:00:00 2001
From: slsevilla <slsevilla@gmail.com>
Date: Mon, 1 Apr 2024 03:45:05 +0000
Subject: [PATCH 17/40] refactor: kraken subwf and modules for ica, reorg wf
 calls #149

---
 modules/local/krakentools_kreport2krona.nf |   7 +-
 modules/local/krakentools_kreport2mpa.nf   |   7 +-
 subworkflows/local/kraken2krona.nf         | 175 ++++++---------------
 workflows/phoenix.nf                       |  10 +-
 4 files changed, 61 insertions(+), 138 deletions(-)

diff --git a/modules/local/krakentools_kreport2krona.nf b/modules/local/krakentools_kreport2krona.nf
index d4d3d2ae..c6bd89cf 100644
--- a/modules/local/krakentools_kreport2krona.nf
+++ b/modules/local/krakentools_kreport2krona.nf
@@ -13,17 +13,14 @@ process KRAKEN2_KRONA {
     path("versions.yml")            , emit: versions
 
     script: // This script is bundled with the pipeline, in phoenix/bin/ orginally from https://github.com/jenniferlu717/KrakenTools on 6/15/2022
-    // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory.
-    if (params.ica==false) { ica = "" } 
-    else if (params.ica==true) { ica = "python ${workflow.launchDir}/bin/" }
-    else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." }
     // define variables
     def prefix = task.ext.prefix ?: "${meta.id}"
     def container_version = "base_v2.1.0"
     def krakentools_version = "1.2"
     def container = task.container.toString() - "quay.io/jvhagey/phoenix@"
+    def script = params.ica ? "python ${params.ica_path}/kreport2krona.py" : "kreport2krona.py"
     """
-    ${ica}kreport2krona.py \\
+    ${script} \\
         --report ${kraken_report} \\
         --output ${prefix}_${type}.krona
 
diff --git a/modules/local/krakentools_kreport2mpa.nf b/modules/local/krakentools_kreport2mpa.nf
index 01abaaa0..c14d4cb9 100644
--- a/modules/local/krakentools_kreport2mpa.nf
+++ b/modules/local/krakentools_kreport2mpa.nf
@@ -12,17 +12,14 @@ process KRAKENTOOLS_KREPORT2MPA {
     path("versions.yml")          , emit: versions
 
     script: // This script is bundled with the pipeline, in phoenix/bin/
-    // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory.
-    if (params.ica==false) { ica = "" } 
-    else if (params.ica==true) { ica = "python ${workflow.launchDir}/bin/" }
-    else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." }
     // define variables
     def prefix = task.ext.prefix ?: "${meta.id}"
     def container_version = "base_v2.1.0"
     def krakentools_version = "1.2"
     def container = task.container.toString() - "quay.io/jvhagey/phoenix@"
+    def script = params.ica ? "python ${params.ica_path}/kreport2mpa.py" : "kreport2mpa.py"
     """
-    ${ica}kreport2mpa.py \\
+    ${script} \\
         --report-file ${kraken_report} \\
         --output ${prefix}.mpa
 
diff --git a/subworkflows/local/kraken2krona.nf b/subworkflows/local/kraken2krona.nf
index c89fac81..40e1c5c1 100755
--- a/subworkflows/local/kraken2krona.nf
+++ b/subworkflows/local/kraken2krona.nf
@@ -2,21 +2,12 @@
 // Subworkflow: run Kraken2
 //
 
-include { KRAKEN2_KRAKEN2 as KRAKEN2_TRIMD                  } from '../../modules/local/kraken2'
-include { KRAKEN2_KRAKEN2 as KRAKEN2_ASMBLD                 } from '../../modules/local/kraken2'
-include { KRAKEN2_KRAKEN2 as KRAKEN2_WTASMBLD               } from '../../modules/local/kraken2'
-include { KRAKEN2_KRONA as KREPORT2KRONA_TRIMD              } from '../../modules/local/krakentools_kreport2krona'
-include { KRAKEN2_KRONA as KREPORT2KRONA_ASMBLD             } from '../../modules/local/krakentools_kreport2krona'
-include { KRAKEN2_KRONA as KREPORT2KRONA_WTASMBLD           } from '../../modules/local/krakentools_kreport2krona'
-include { KRONA_KTIMPORTTEXT as KRONA_KTIMPORTTEXT_TRIMD    } from '../../modules/local/ktimporttext'
-include { KRONA_KTIMPORTTEXT as KRONA_KTIMPORTTEXT_ASMBLD   } from '../../modules/local/ktimporttext'
-include { KRONA_KTIMPORTTEXT as KRONA_KTIMPORTTEXT_WTASMBLD } from '../../modules/local/ktimporttext'
-include { KRAKENTOOLS_KREPORT2MPA as KREPORT2MPA_TRIMD      } from '../../modules/local/krakentools_kreport2mpa'
-include { KRAKENTOOLS_KREPORT2MPA as KREPORT2MPA_ASMBLD     } from '../../modules/local/krakentools_kreport2mpa'
-include { KRAKENTOOLS_MAKEKREPORT                           } from '../../modules/local/krakentools_makekreport'
-include { KRAKEN_BEST_HIT as KRAKEN2_BH_TRIMD               } from '../../modules/local/kraken_bh'
-include { KRAKEN_BEST_HIT as KRAKEN2_BH_ASMBLD              } from '../../modules/local/kraken_bh'
-include { KRAKEN_BEST_HIT as KRAKEN2_BH_WTASMBLD            } from '../../modules/local/kraken_bh'
+include { KRAKEN2_KRAKEN2                               } from '../../modules/local/kraken2'
+include { KRAKEN2_KRONA                                 } from '../../modules/local/krakentools_kreport2krona'
+include { KRONA_KTIMPORTTEXT                            } from '../../modules/local/ktimporttext'
+include { KRAKENTOOLS_KREPORT2MPA                       } from '../../modules/local/krakentools_kreport2mpa'
+include { KRAKENTOOLS_MAKEKREPORT                       } from '../../modules/local/krakentools_makekreport'
+include { KRAKEN_BEST_HIT                               } from '../../modules/local/kraken_bh'
 
 workflow KRAKEN2_WF {
     take:
@@ -46,95 +37,27 @@ workflow KRAKEN2_WF {
         }
     }
 
-    if(type =="trimd") {
+    // Checking for Contamination in trimmed reads
+    KRAKEN2_KRAKEN2 (
+        fasta_ch, type, params.save_output_fastqs, params.save_reads_assignment
+    )
+    ch_versions = ch_versions.mix(KRAKEN2_KRAKEN2.out.versions)
 
-        // Checking for Contamination in trimmed reads
-        KRAKEN2_TRIMD (
-            fasta_ch, "trimd", true, true
-        )
-        ch_versions = ch_versions.mix(KRAKEN2_TRIMD.out.versions)
-
-        // Create mpa file
-        KREPORT2MPA_TRIMD (
-            KRAKEN2_TRIMD.out.report
-        )
-        ch_versions = ch_versions.mix(KREPORT2MPA_TRIMD.out.versions)
-
-        // Converting kraken report to krona file to have hierarchical output in krona plot
-        KREPORT2KRONA_TRIMD (
-            KRAKEN2_TRIMD.out.report, "trimd"
-        )
-        ch_versions = ch_versions.mix(KREPORT2KRONA_TRIMD.out.versions)
-
-        // Create krona plot from kraken report
-        KRONA_KTIMPORTTEXT_TRIMD (
-            KREPORT2KRONA_TRIMD.out.krona, "trimd"
-        )
-        ch_versions = ch_versions.mix(KRONA_KTIMPORTTEXT_TRIMD.out.versions)
-
-        // Combining kraken report with quast report based on meta.id
-        kraken_bh_trimd_ch = KRAKEN2_TRIMD.out.report.map{meta, report         -> [[id:meta.id], report]}\
-        .join(qc_stats.map{                               meta, fastp_total_qc -> [[id:meta.id], fastp_total_qc]}, by: [0])
-
-        // Getting Kraken best hit for assembled data
-        KRAKEN2_BH_TRIMD (
-            kraken_bh_trimd_ch, "trimd"
-        )
-        ch_versions = ch_versions.mix(KRAKEN2_BH_TRIMD.out.versions)
-
-        report        = KRAKEN2_TRIMD.out.report
-        k2_bh_summary = KRAKEN2_BH_TRIMD.out.ksummary
-        krona_html    = KRONA_KTIMPORTTEXT_TRIMD.out.html
-
-    } else if(type =="asmbld") {
-
-        // Checking for Contamination in scaffolds
-        KRAKEN2_ASMBLD (
-            fasta_ch, "asmbld", true, true
-        )
-        ch_versions = ch_versions.mix(KRAKEN2_ASMBLD.out.versions)
-
-        // Create mpa file
-        KREPORT2MPA_ASMBLD (
-            KRAKEN2_ASMBLD.out.report
-        )
-        ch_versions = ch_versions.mix(KREPORT2MPA_ASMBLD.out.versions)
+    // Create mpa file
+    KRAKENTOOLS_KREPORT2MPA (
+        KRAKEN2_KRAKEN2.out.report
+    )
+    ch_versions = ch_versions.mix(KRAKENTOOLS_KREPORT2MPA.out.versions)
 
+    if (type == "trimd" || type == "asmbld"){
         // Converting kraken report to krona file to have hierarchical output in krona plot
-        KREPORT2KRONA_ASMBLD (
-            KRAKEN2_ASMBLD.out.report, "asmbld"
-        )
-        ch_versions = ch_versions.mix(KREPORT2KRONA_ASMBLD.out.versions)
-
-        // Create krona plot from kraken report
-        KRONA_KTIMPORTTEXT_ASMBLD (
-            KREPORT2KRONA_ASMBLD.out.krona, "asmbld"
+        KRAKEN2_KRONA (
+            KRAKEN2_KRAKEN2.out.report, type
         )
-        ch_versions = ch_versions.mix(KRONA_KTIMPORTTEXT_ASMBLD.out.versions)
-
-        kraken_bh_asmbld_ch = KRAKEN2_ASMBLD.out.report.map{meta, report     -> [[id:meta.id], report]}\
-        .join(quast.map{                                    meta, report_tsv -> [[id:meta.id], report_tsv]}, by: [0])
-
-        // Getting Kraken best hit for assembled data
-        KRAKEN2_BH_ASMBLD (
-            kraken_bh_asmbld_ch, "asmbld"
-        )
-        ch_versions = ch_versions.mix(KRAKEN2_BH_ASMBLD.out.versions)
-
-        report        = KRAKEN2_ASMBLD.out.report
-        k2_bh_summary = KRAKEN2_BH_ASMBLD.out.ksummary
-        krona_html    = KRONA_KTIMPORTTEXT_ASMBLD.out.html
-
-    } else if(type=="wtasmbld") {
-
-        // Getting species ID as back up for FastANI and checking contamination isn't in assembly
-        KRAKEN2_WTASMBLD (
-            fasta_ch, "wtasmbld", true, true
-        )
-        ch_versions = ch_versions.mix(KRAKEN2_WTASMBLD.out.versions)
-
+        report = KRAKEN2_KRAKEN2.out.report
+    } else if (type == "wtasmbld"){        
         // Add in krakendb into the kraken reads channel so each fasta has a krakendb to go with it. 
-        make_report_ch = KRAKEN2_WTASMBLD.out.classified_reads_assignment.combine(kraken2_db_path)
+        make_report_ch = KRAKEN2_KRAKEN2.out.classified_reads_assignment.combine(kraken2_db_path)
 
         // Create weighted kraken report based on scaffold length
         KRAKENTOOLS_MAKEKREPORT (
@@ -143,38 +66,44 @@ workflow KRAKEN2_WF {
         ch_versions = ch_versions.mix(KRAKENTOOLS_MAKEKREPORT.out.versions)
 
         // Converting kraken report to krona file to have hierarchical output in krona plot
-        KREPORT2KRONA_WTASMBLD (
+        KRAKEN2_KRONA (
             KRAKENTOOLS_MAKEKREPORT.out.kraken_weighted_report, "wtasmbld"
         )
-        ch_versions = ch_versions.mix(KREPORT2KRONA_WTASMBLD.out.versions)
+        
+        report = KRAKENTOOLS_MAKEKREPORT.out.kraken_weighted_report
+    }
+    ch_versions = ch_versions.mix(KRAKEN2_KRONA.out.versions)
+
+    
+    // Create krona plot from kraken report
+    KRONA_KTIMPORTTEXT (
+        KRAKEN2_KRONA.out.krona, type
+    )
+    ch_versions = ch_versions.mix(KRONA_KTIMPORTTEXT.out.versions)
 
+    if (type == "trimd"){
         // Combining kraken report with quast report based on meta.id
-        kraken_bh_wtasmbld_ch = KRAKENTOOLS_MAKEKREPORT.out.kraken_weighted_report.map{meta, kraken_weighted_report -> [[id:meta.id], kraken_weighted_report]}\
+        kraken_bh_ch = KRAKEN2_KRAKEN2.out.report.map{meta, report         -> [[id:meta.id], report]}\
+        .join(qc_stats.map{ meta, fastp_total_qc -> [[id:meta.id], fastp_total_qc]}, by: [0])
+    } else if (type == "asmbld"){
+        // Combining kraken report with quast report based on meta.id
+        kraken_bh_ch = KRAKEN2_KRAKEN2.out.report.map{meta, report     -> [[id:meta.id], report]}\
+        .join(quast.map{ meta, report_tsv -> [[id:meta.id], report_tsv]}, by: [0])
+    } else if (type == "wtasmbld"){
+        // Combining kraken report with quast report based on meta.id
+        kraken_bh_ch = KRAKENTOOLS_MAKEKREPORT.out.kraken_weighted_report.map{meta, kraken_weighted_report -> [[id:meta.id], kraken_weighted_report]}\
         .join(quast.map{                                                               meta, report_tsv             -> [[id:meta.id], report_tsv]}, by: [0])
-
-        // Getting Kraken best hit for assembled data
-        KRAKEN2_BH_WTASMBLD (
-            kraken_bh_wtasmbld_ch, "wtasmbld"
-        )
-        ch_versions = ch_versions.mix(KRAKEN2_BH_WTASMBLD.out.versions)
-
-        KRONA_KTIMPORTTEXT_WTASMBLD (
-            KREPORT2KRONA_WTASMBLD.out.krona, "wtasmbld"
-        )
-        ch_versions = ch_versions.mix(KRONA_KTIMPORTTEXT_WTASMBLD.out.versions)
-
-        report        = KRAKENTOOLS_MAKEKREPORT.out.kraken_weighted_report
-        k2_bh_summary = KRAKEN2_BH_WTASMBLD.out.ksummary
-        krona_html    = KRONA_KTIMPORTTEXT_WTASMBLD.out.html
-
-    } else {
-        println("Type options are: wtasmbld, asmbld or trimd")
     }
+        
+    // Getting Kraken best hit for assembled data
+    KRAKEN_BEST_HIT (
+        kraken_bh_ch, type
+    )
+    ch_versions = ch_versions.mix(KRAKEN_BEST_HIT.out.versions)
 
     emit:
     report        = report
-    k2_bh_summary = k2_bh_summary
-    krona_html    = krona_html
+    k2_bh_summary = KRAKEN_BEST_HIT.out.ksummary
+    krona_html    = KRONA_KTIMPORTTEXT.out.html
     versions      = ch_versions // channel: [ versions.yml ]
-
 }
\ No newline at end of file
diff --git a/workflows/phoenix.nf b/workflows/phoenix.nf
index bddc3b3b..370fbf2b 100644
--- a/workflows/phoenix.nf
+++ b/workflows/phoenix.nf
@@ -186,11 +186,11 @@ workflow PHOENIX_EXTERNAL {
         )
         ch_versions = ch_versions.mix(FASTQCTRIMD.out.versions.first())
 
-        // // Checking for Contamination in trimmed reads, creating krona plots and best hit files
-        // KRAKEN2_TRIMD (
-        //     FASTP_TRIMD.out.reads, GET_TRIMD_STATS.out.outcome, "trimd", GET_TRIMD_STATS.out.fastp_total_qc, [], ASSET_CHECK.out.kraken_db, "reads"
-        // )
-        // ch_versions = ch_versions.mix(KRAKEN2_TRIMD.out.versions)
+        // Checking for Contamination in trimmed reads, creating krona plots and best hit files
+        KRAKEN2_TRIMD (
+            FASTP_TRIMD.out.reads, GET_TRIMD_STATS.out.outcome, "trimd", GET_TRIMD_STATS.out.fastp_total_qc, [], ASSET_CHECK.out.kraken_db, "reads"
+        )
+        ch_versions = ch_versions.mix(KRAKEN2_TRIMD.out.versions)
 
         // SPADES_WF (
         //     FASTP_SINGLES.out.reads, \

From 5896dcd9b30d34e44a5e8f93133dfe6f7de27a86 Mon Sep 17 00:00:00 2001
From: slsevilla <slsevilla@gmail.com>
Date: Mon, 1 Apr 2024 03:46:17 +0000
Subject: [PATCH 18/40] refactor: add check for terra parms #151

---
 main.nf | 1 +
 1 file changed, 1 insertion(+)

diff --git a/main.nf b/main.nf
index a7a14d98..a34f5b49 100755
--- a/main.nf
+++ b/main.nf
@@ -44,6 +44,7 @@ workflow PHOENIX {
     def checkPathParamList = [ params.input, params.multiqc_config, params.kraken2db] //removed , params.fasta to stop issue w/connecting to aws and igenomes not used
     for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } }
     if (params.ica != true && params.ica != false) {exit 1, "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods."}
+    if (params.terra != true && params.terra != false) {exit 1, "Please set params.terra to either \"true\" if running on terra or \"false\" for all other methods."}
 
     //input on command line
     if (params.input) { ch_input = file(params.input) } else { exit 1, 'For -entry PHOENIX: Input samplesheet not specified!' }

From c75a6a916f730d5837e74ee8f69530b212164e56 Mon Sep 17 00:00:00 2001
From: slsevilla <slsevilla@gmail.com>
Date: Mon, 1 Apr 2024 20:34:14 +0000
Subject: [PATCH 19/40] refactor: add param for extended_qc #151

---
 main.nf              |  3 +++
 workflows/phoenix.nf | 24 ++++++++++++------------
 2 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/main.nf b/main.nf
index a34f5b49..4635262c 100755
--- a/main.nf
+++ b/main.nf
@@ -84,6 +84,9 @@ workflow CDC_PHOENIX {
     if (params.input) { ch_input = file(params.input) } else { exit 1, 'For -entry CDC_PHOENIX: Input samplesheet not specified!' }
     ch_versions = Channel.empty() // Used to collect the software versions
 
+    // true is for -entry CDC_PHOENIX and CDC_SCAFFOLDS - used in SPADES
+    extended_qc=false
+
     main:
         PHOENIX_EXQC ( ch_input, ch_versions, true )
 
diff --git a/workflows/phoenix.nf b/workflows/phoenix.nf
index 370fbf2b..5ec00a70 100644
--- a/workflows/phoenix.nf
+++ b/workflows/phoenix.nf
@@ -192,18 +192,18 @@ workflow PHOENIX_EXTERNAL {
         )
         ch_versions = ch_versions.mix(KRAKEN2_TRIMD.out.versions)
 
-        // SPADES_WF (
-        //     FASTP_SINGLES.out.reads, \
-        //     FASTP_TRIMD.out.reads, \
-        //     GET_TRIMD_STATS.out.fastp_total_qc, \
-        //     GET_RAW_STATS.out.combined_raw_stats, \
-        //     [], \
-        //     KRAKEN2_TRIMD.out.report, \
-        //     KRAKEN2_TRIMD.out.krona_html, \
-        //     KRAKEN2_TRIMD.out.k2_bh_summary, \
-        //     false
-        // )
-        // ch_versions = ch_versions.mix(SPADES_WF.out.versions)
+        SPADES_WF (
+            FASTP_SINGLES.out.reads, \
+            FASTP_TRIMD.out.reads, \
+            GET_TRIMD_STATS.out.fastp_total_qc, \
+            GET_RAW_STATS.out.combined_raw_stats, \
+            [], \
+            KRAKEN2_TRIMD.out.report, \
+            KRAKEN2_TRIMD.out.krona_html, \
+            KRAKEN2_TRIMD.out.k2_bh_summary, \
+            params.extended_qc
+        )
+        ch_versions = ch_versions.mix(SPADES_WF.out.versions)
 
         // // Rename scaffold headers
         // RENAME_FASTA_HEADERS (

From e2c3145b4dcdf6ee0deae1971b45bf4108fe7152 Mon Sep 17 00:00:00 2001
From: slsevilla <slsevilla@gmail.com>
Date: Mon, 1 Apr 2024 20:35:22 +0000
Subject: [PATCH 20/40] refactor: spades for ica #149

---
 modules/local/spades.nf | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/modules/local/spades.nf b/modules/local/spades.nf
index e919591d..0f49c6e8 100755
--- a/modules/local/spades.nf
+++ b/modules/local/spades.nf
@@ -24,10 +24,6 @@ process SPADES {
     tuple val(meta), path("*_spades_outcome.csv") ,                emit: spades_outcome
 
     script:
-    // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory.
-    if (params.ica==false) { ica = "" } 
-    else if (params.ica==true) { ica = "bash ${workflow.launchDir}/bin/" }
-    else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." }
     // define variables
     def args = task.ext.args ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
@@ -37,14 +33,17 @@ process SPADES {
     def phred_offset = params.phred
     def extended_qc_arg = extended_qc ? "-c" : ""
     def container = task.container.toString() - "staphb/spades@"
+    def script_stats = params.ica ? "${params.ica_path}/pipeline_stats_writer_trimd.sh" : "pipeline_stats_writer_trimd.sh"
+    def script_before = params.ica ? "${params.ica_path}/beforeSpades.sh" : "beforeSpades.sh"
+    def script_after = params.ica ? "${params.ica_path}/afterSpades.sh" : "afterSpades.sh"
     """
     # preemptively create _summary_line.csv and .synopsis file in case spades fails (no contigs or scaffolds created) we can still collect upstream stats. 
-    ${ica}pipeline_stats_writer_trimd.sh -a ${fastp_raw_qc} -b ${fastp_total_qc} -c ${reads[0]} -d ${reads[1]} -e ${kraken2_trimd_report} -f ${k2_bh_summary} -g ${krona_trimd}
-    ${ica}beforeSpades.sh -k ${k2_bh_summary} -n ${prefix} -d ${full_outdir} ${extended_qc_arg}
+    ${script_stats} -a ${fastp_raw_qc} -b ${fastp_total_qc} -c ${reads[0]} -d ${reads[1]} -e ${kraken2_trimd_report} -f ${k2_bh_summary} -g ${krona_trimd}
+    ${script_before} -k ${k2_bh_summary} -n ${prefix} -d ${full_outdir} ${extended_qc_arg}
     #get version information
-    bspades_version=\$(${ica}beforeSpades.sh -V)
-    pipestats_version=\$(${ica}pipeline_stats_writer_trimd.sh -V)
-    aspades_version=\$(${ica}afterSpades.sh -V)
+    bspades_version=\$(${script_before} -V)
+    pipestats_version=\$(${script_stats} -V)
+    aspades_version=\$(${script_after} -V)
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
@@ -93,6 +92,6 @@ process SPADES {
     #Create a summaryline file that will be deleted later if spades is successful if not this line shows up in the final Phoenix_output_summary file
     #create file '*_spades_outcome.csv' to state if spades fails, if contigs or scaffolds are created. See spades_failure.nf subworkflow
     #This file will determine if downstream process GENERATE_PIPELINE_STATS_FAILURE and CREATE_SUMMARY_LINE_FAILURE will run (if spades creates contigs, but not scaffolds).
-    ${ica}afterSpades.sh
+    ${script_after}
     """
 }

From 05343daa1a555cec8e8878959b9fb00c7a38384b Mon Sep 17 00:00:00 2001
From: slsevilla <slsevilla@gmail.com>
Date: Mon, 1 Apr 2024 20:36:28 +0000
Subject: [PATCH 21/40] refactor: spades wf modules for ica #149

---
 modules/local/determine_taxa_id_failure.nf         |  9 +++------
 modules/local/generate_pipeline_stats_failure.nf   | 14 ++++----------
 .../local/generate_pipeline_stats_failure_exqc.nf  | 14 ++++----------
 modules/local/phoenix_summary_line_failure.nf      |  9 +++------
 4 files changed, 14 insertions(+), 32 deletions(-)

diff --git a/modules/local/determine_taxa_id_failure.nf b/modules/local/determine_taxa_id_failure.nf
index e7aa7b82..6a11e57b 100644
--- a/modules/local/determine_taxa_id_failure.nf
+++ b/modules/local/determine_taxa_id_failure.nf
@@ -17,18 +17,15 @@ process DETERMINE_TAXA_ID_FAILURE {
     "${spades_outcome[0]}" == "run_failure" || "${spades_outcome[1]}" == "no_scaffolds" || "${spades_outcome[2]}" == "no_contigs"
 
     script: // This script is bundled with the pipeline, in cdcgov/phoenix/bin/
-    // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory.
-    if (params.ica==false) { ica = "" } 
-    else if (params.ica==true) { ica = "bash ${workflow.launchDir}/bin/" }
-    else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." }
     // define variables
     def prefix = task.ext.prefix ?: "${meta.id}"
     def container_version = "base_v2.1.0"
     def container = task.container.toString() - "quay.io/jvhagey/phoenix@"
+    def script = params.ica ? "bash ${params.ica_path}/determine_taxID.sh" : "determine_taxID.sh"
     """
-    ${ica}determine_taxID.sh -r $k2_bh_summary -s $meta.id -d $nodes_file -m $names_file
+    ${script} -r $k2_bh_summary -s $meta.id -d $nodes_file -m $names_file
 
-    script_version=\$(${ica}determine_taxID.sh -V)
+    script_version=\$(${script} -V)
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
diff --git a/modules/local/generate_pipeline_stats_failure.nf b/modules/local/generate_pipeline_stats_failure.nf
index af4181fd..a0fa9e79 100644
--- a/modules/local/generate_pipeline_stats_failure.nf
+++ b/modules/local/generate_pipeline_stats_failure.nf
@@ -22,20 +22,14 @@ process GENERATE_PIPELINE_STATS_FAILURE {
     "${spades_outcome[0]}" == "run_failure" || "${spades_outcome[1]}" == "no_scaffolds" || "${spades_outcome[2]}" == "no_contigs"
 
     script: // This script is bundled with the pipeline, in cdcgov/phoenix/bin/
-    // terra=true sets paths for bc/wget for terra container paths
-    if (params.terra==false) { terra = ""} 
-    else if (params.terra==true) { terra = "-2 terra" }
-    else { error "Please set params.terra to either \"true\" or \"false\"" }
-    // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory.
-    if (params.ica==false) { ica = "" } 
-    else if (params.ica==true) { ica = "bash ${workflow.launchDir}/bin/" }
-    else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." }
     // define variables
     def prefix = task.ext.prefix ?: "${meta.id}"
     def container_version = "base_v2.1.0"
     def container = task.container.toString() - "quay.io/jvhagey/phoenix@"
+    def script = params.ica ? "bash ${params.ica_path}/pipeline_stats_writer.sh" : "pipeline_stats_writer.sh"
+    def terra = params.terra ? "-2 terra" : ""
     """
-    ${ica}pipeline_stats_writer.sh \\
+    ${script} \\
         -a $raw_qc \\
         -b $fastp_total_qc \\
         -d ${prefix} \\
@@ -46,7 +40,7 @@ process GENERATE_PIPELINE_STATS_FAILURE {
         -5 $coverage \\
         $terra
 
-    script_version=\$(${ica}pipeline_stats_writer.sh -V)
+    script_version=\$(${script} -V)
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
diff --git a/modules/local/generate_pipeline_stats_failure_exqc.nf b/modules/local/generate_pipeline_stats_failure_exqc.nf
index 6006d332..9cf7fabc 100644
--- a/modules/local/generate_pipeline_stats_failure_exqc.nf
+++ b/modules/local/generate_pipeline_stats_failure_exqc.nf
@@ -23,21 +23,15 @@ process GENERATE_PIPELINE_STATS_FAILURE_EXQC {
     "${spades_outcome[0]}" == "run_failure" || "${spades_outcome[1]}" == "no_scaffolds" || "${spades_outcome[2]}" == "no_contigs"
 
     script: // This script is bundled with the pipeline, in cdcgov/phoenix/bin/
-    // terra=true sets paths for bc/wget for terra container paths
-    if (params.terra==false) { terra = ""} 
-    else if (params.terra==true) { terra = "-2 terra" }
-    else { error "Please set params.terra to either \"true\" or \"false\"" }
-    // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory.
-    if (params.ica==false) { ica = "" } 
-    else if (params.ica==true) { ica = "bash ${workflow.launchDir}/bin/" }
-    else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." }
     // define variables
     def prefix = task.ext.prefix ?: "${meta.id}"
     def container_version = "base_v2.1.0"
     def container = task.container.toString() - "quay.io/jvhagey/phoenix@"
+    def script = params.ica ? "bash ${params.ica_path}/pipeline_stats_writer.sh" : "pipeline_stats_writer.sh"
+    def terra = params.terra ? "-2 terra" : ""
     """
     # this runs with -entry CDC_PHEONIX when SPAdes fails (creates contigs and not scaffolds)
-    ${ica}pipeline_stats_writer.sh \\
+    ${script} \\
         -a $raw_qc \\
         -b $fastp_total_qc \\
         -d ${prefix} \\
@@ -49,7 +43,7 @@ process GENERATE_PIPELINE_STATS_FAILURE_EXQC {
         -5 $coverage \\
         $terra
 
-    script_version=\$(${ica}pipeline_stats_writer.sh -V)
+    script_version=\$(${script} -V)
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
diff --git a/modules/local/phoenix_summary_line_failure.nf b/modules/local/phoenix_summary_line_failure.nf
index 3f63c7dd..d46eabd7 100644
--- a/modules/local/phoenix_summary_line_failure.nf
+++ b/modules/local/phoenix_summary_line_failure.nf
@@ -20,17 +20,14 @@ process CREATE_SUMMARY_LINE_FAILURE {
     "${spades_outcome[0]}" == "run_failure" || "${spades_outcome[1]}" == "no_scaffolds" || "${spades_outcome[2]}" == "no_contigs"
 
     script: // This script is bundled with the pipeline, in cdcgov/phoenix/bin/
-    // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory.
-    if (params.ica==false) { ica = "" } 
-    else if (params.ica==true) { ica = "python ${workflow.launchDir}/bin/" }
-    else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." }
     // define variables
     def prefix = task.ext.prefix ?: "${meta.id}"
     def extended_qc_arg = extended_qc ? "--extended_qc" : ""
     def container_version = "base_v2.1.0"
     def container = task.container.toString() - "quay.io/jvhagey/phoenix@"
+    def script = params.ica ? "python ${params.ica_path}/Phoenix_summary_line.py" : "Phoenix_summary_line.py"
     """
-    ${ica}Phoenix_summary_line.py \\
+    ${script} \\
         -n ${prefix} \\
         -k $trimd_ksummary \\
         -t $fastp_total_qc \\
@@ -42,7 +39,7 @@ process CREATE_SUMMARY_LINE_FAILURE {
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         python: \$(python --version | sed 's/Python //g')
-        Phoenix_summary_line.py: \$(${ica}Phoenix_summary_line.py --version )
+        Phoenix_summary_line.py: \$(${script} --version )
         phoenix_base_container_tag: ${container_version}
         phoenix_base_container: ${container}
     END_VERSIONS

From 66e8f4640c135f7c8d86a443013bcc5b73cc7f15 Mon Sep 17 00:00:00 2001
From: slsevilla <slsevilla@gmail.com>
Date: Mon, 1 Apr 2024 20:40:10 +0000
Subject: [PATCH 22/40] refactor: rename_headers for ica #149

---
 modules/local/rename_fasta_headers.nf |  9 +++------
 workflows/phoenix.nf                  | 10 +++++-----
 2 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/modules/local/rename_fasta_headers.nf b/modules/local/rename_fasta_headers.nf
index c59cac46..9239daaf 100644
--- a/modules/local/rename_fasta_headers.nf
+++ b/modules/local/rename_fasta_headers.nf
@@ -12,26 +12,23 @@ process RENAME_FASTA_HEADERS {
     path "versions.yml"                               , emit: versions
 
     script: // This script is bundled with the pipeline, in cdcgov/phoenix/bin/
-    // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory.
-    if (params.ica==false) { ica = "" } 
-    else if (params.ica==true) { ica = "python ${workflow.launchDir}/bin/" }
-    else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." }
     // define variables
     def prefix = task.ext.prefix ?: "${meta.id}"
     def container_version = "base_v2.1.0"
     def container = task.container.toString() - "quay.io/jvhagey/phoenix@"
+    def script = params.ica ? "python ${params.ica_path}/rename_fasta_headers.py" : "rename_fasta_headers.py"
     """
     gunzip --force ${assembled_scaffolds}
     unzipped=\$(basename ${assembled_scaffolds} .gz) #adding this in to allow alternative file names with -entry SCAFFOLDS --scaffolds_ext
 
-    ${ica}rename_fasta_headers.py --input \$unzipped --output ${prefix}.renamed.scaffolds.fa --name ${prefix}
+    ${script} --input \$unzipped --output ${prefix}.renamed.scaffolds.fa --name ${prefix}
 
     gzip --force ${prefix}.renamed.scaffolds.fa
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         python: \$(python --version | sed 's/Python //g')
-        rename_fasta_headers.py: \$(${ica}rename_fasta_headers.py --version )
+        rename_fasta_headers.py: \$(${script} --version )
         phoenix_base_container_tag: ${container_version}
         phoenix_base_container: ${container}
     END_VERSIONS
diff --git a/workflows/phoenix.nf b/workflows/phoenix.nf
index 5ec00a70..a05229d0 100644
--- a/workflows/phoenix.nf
+++ b/workflows/phoenix.nf
@@ -205,11 +205,11 @@ workflow PHOENIX_EXTERNAL {
         )
         ch_versions = ch_versions.mix(SPADES_WF.out.versions)
 
-        // // Rename scaffold headers
-        // RENAME_FASTA_HEADERS (
-        //     SPADES_WF.out.spades_ch
-        // )
-        // ch_versions = ch_versions.mix(RENAME_FASTA_HEADERS.out.versions)
+        // Rename scaffold headers
+        RENAME_FASTA_HEADERS (
+            SPADES_WF.out.spades_ch
+        )
+        ch_versions = ch_versions.mix(RENAME_FASTA_HEADERS.out.versions)
 
         // // Removing scaffolds <500bp
         // BBMAP_REFORMAT (

From f72f42a92de20d708e5d7e540ef2f7075b5bf7c3 Mon Sep 17 00:00:00 2001
From: slsevilla <slsevilla@gmail.com>
Date: Mon, 1 Apr 2024 21:53:34 +0000
Subject: [PATCH 23/40] refactor: ica, output file handling #149

---
 modules/local/fairy_scaffold_count_check.nf | 109 +++++++++++---------
 workflows/phoenix.nf                        |  40 +++----
 2 files changed, 80 insertions(+), 69 deletions(-)

diff --git a/modules/local/fairy_scaffold_count_check.nf b/modules/local/fairy_scaffold_count_check.nf
index b7f914d2..30a67c45 100644
--- a/modules/local/fairy_scaffold_count_check.nf
+++ b/modules/local/fairy_scaffold_count_check.nf
@@ -17,25 +17,12 @@ process SCAFFOLD_COUNT_CHECK {
     path(names_file)
 
     output:
-    tuple val(meta), path('*_summary.txt'),             emit: outcome
-    path('*_summaryline.tsv'),           optional:true, emit: summary_line
-    tuple val(meta), path('*.synopsis'), optional:true, emit: synopsis
-    path("versions.yml"),                               emit: versions
+    tuple val(meta), path('*_summary_complete.txt'),            emit: outcome
+    path('*_summaryline.tsv'),                                  optional:true, emit: summary_line
+    tuple val(meta), path('*.synopsis'),                        optional:true, emit: synopsis
+    path("versions.yml"),                                       emit: versions
 
     script:
-    // terra=true sets paths for bc/wget for terra container paths
-    if (params.terra==false) { terra = ""} 
-    else if (params.terra==true) { terra = "-2 terra" }
-    else { error "Please set params.terra to either \"true\" or \"false\"" }
-    // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory.
-    if (params.ica==false) { 
-        ica_python = ""
-        ica_bash = ""
-    } else if (params.ica==true) { 
-        ica_python = "python ${workflow.launchDir}/bin/" 
-        ica_bash = "bash ${workflow.launchDir}/bin/" 
-    }
-    else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." }
     // define variables
     def prefix = task.ext.prefix ?: "${meta.id}"
     def fairy_read_count_outcome_file = fairy_read_count_outcome ? "$fairy_read_count_outcome" : ""
@@ -49,58 +36,82 @@ process SCAFFOLD_COUNT_CHECK {
     def extended_qc_arg = extended_qc ? "--extended_qc" : ""
     def container_version = "base_v2.1.0"
     def container = task.container.toString() - "quay.io/jvhagey/phoenix@"
+    def script_id = params.ica ? "${params.ica_path}/determine_taxID.sh" : "determine_taxID.sh"
+    def script_writer = params.ica ? "${params.ica_path}/pipeline_stats_writer.sh" : "pipeline_stats_writer.sh"
+    def script_summary = params.ica ? "python ${params.ica_path}/Phoenix_summary_line.py" : "Phoenix_summary_line.py"
+    def script_edit = params.ica ? "python ${params.ica_path}/edit_line_summary.py" : "edit_line_summary.py"
+    def terra = params.terra ? "-2 terra" : ""
     """
-    #checking that the output contains scaffolds still:
+    # set new final script name
+    complete_summary="${prefix}_summary_complete.txt"
+    
+    # handle -entry SCAFFOLDS
+    scaffold_entry_file() {
+        cat <<<EOT >> \${complete_summary}
+        PASSED: Using Scaffold entry no corruption check run on R1.
+        PASSED: Using Scaffold entry no corruption check run on R2.
+        PASSED: Using Scaffold entry no paired reads to check.
+        PASSED: Using Scaffold entry no trimd reads to check.
+        FAILED: No scaffolds in ${prefix} after filtering!
+        EOT
+    }
+
+    # checking that the output contains scaffolds still:
     if grep "Output:                 	0 reads (0.00%) 	0 bases (0.00%)" ${bbmap_log}; then
         #Check if the file exists already (it won't with -entry SCAFFOLDS)
-        if [ -f ${prefix}_summary_old_3.txt ]; then
-            #replace end of line with actual error message
-            sed -i 's/End_of_File/FAILED: No scaffolds in ${prefix} after filtering!/' ${fairy_read_count_outcome_file}
+        if [ -f ${fairy_read_count_outcome} ]; then
+            # replace end of line with actual error message
+            cp ${fairy_read_count_outcome} \${complete_summary}
+            sed -i 's/End_of_File/FAILED: No scaffolds in ${prefix} after filtering!/' \${complete_summary}
         else
-            echo "PASSED: Using Scaffold entry no corruption check run on R1." > ${prefix}_summary_old_3.txt
-            echo "PASSED: Using Scaffold entry no corruption check run on R2." >> ${prefix}_summary_old_3.txt
-            echo "PASSED: Using Scaffold entry no paired reads to check." >> ${prefix}_summary_old_3.txt
-            echo "PASSED: Using Scaffold entry no trimd reads to check." >> ${prefix}_summary_old_3.txt
-            echo "FAILED: No scaffolds in ${prefix} after filtering!" >> ${prefix}_summary_old_3.txt
+            scaffold_entry_file
+            echo "FAILED: No scaffolds in ${prefix} after filtering!" >> \${complete_summary}
         fi
 
         # if the sample has no scaffolds left make the summaryline and synopsis file for it. 
         # get taxa ID
-        ${ica_bash}determine_taxID.sh -r $kraken2_trimd_summary -s ${prefix} -d $nodes_file -m $names_file
+        ${script_id} -r $kraken2_trimd_summary -s ${prefix} -d $nodes_file -m $names_file
 
-        #write synopsis file
-        ${ica_bash}pipeline_stats_writer.sh -d ${prefix} -q ${prefix}.tax -5 $coverage $raw_qc $fastp_total_qc_pipeline_stats \\
-        $kraken2_trimd_report $kraken2_trimd_summary_pipeline_stats $krona_trimd $terra
+        # write synopsis file
+        ${script_writer} \\ 
+            -d ${prefix} \\
+            -q ${prefix}.tax \\
+            -5 $coverage \\
+            $raw_qc \\
+            $fastp_total_qc_pipeline_stats \\
+            $kraken2_trimd_report \\
+            $kraken2_trimd_summary_pipeline_stats \\
+            $krona_trimd $terra
 
         # write summary_line file
-        ${ica_python}Phoenix_summary_line.py -n ${prefix} -s ${prefix}.synopsis -x ${prefix}.tax -o ${prefix}_summaryline.tsv\\
-        $kraken2_trimd_summary_summaryline $fastp_total_qc_summaryline $extended_qc_arg
+        ${script_summary} \\
+            -n ${prefix} \\
+            -s ${prefix}.synopsis \\
+            -x ${prefix}.tax
+            -o ${prefix}_summaryline.tsv\\
+            $kraken2_trimd_summary_summaryline \\
+            $fastp_total_qc_summaryline \\
+            $extended_qc_arg
 
         # change pass to fail and add in error
-        ${ica_python}edit_line_summary.py -i ${prefix}_summaryline.tsv
-
-        #change file name.
-        cp ${prefix}_summary_old_3.txt ${prefix}_summary.txt
+        ${script_edit} -i ${prefix}_summaryline.tsv
 
     # if there are scaffolds left after filtering do the following...
     else
         #Check if the file exists already (it won't with -entry SCAFFOLDS)
-        if [ -f ${prefix}_summary_old_3.txt ]; then
+        if [ -f ${fairy_read_count_outcome} ]; then
             #replace end of line with actual error message
-            sed -i 's/End_of_File/PASSED: More than 0 scaffolds in ${prefix} after filtering./' ${fairy_read_count_outcome_file}
+            cp ${fairy_read_count_outcome} \${complete_summary}
+            sed -i 's/End_of_File/PASSED: More than 0 scaffolds in ${prefix} after filtering./' \${complete_summary}
         else
-            echo "PASSED: Using Scaffold entry no corruption check run on R1." > ${prefix}_summary_old_3.txt
-            echo "PASSED: Using Scaffold entry no corruption check run on R2." >> ${prefix}_summary_old_3.txt
-            echo "PASSED: Using Scaffold entry no paired reads to check." >> ${prefix}_summary_old_3.txt
-            echo "PASSED: Using Scaffold entry no trimd reads to check." >> ${prefix}_summary_old_3.txt
-            echo "PASSED: More than 0 scaffolds in ${prefix} after filtering." >> ${prefix}_summary_old_3.txt
+            scaffold_entry_file
+            echo "PASSED: More than 0 scaffolds in ${prefix} after filtering." >> \${complete_summary}
         fi
-        cp ${prefix}_summary_old_3.txt ${prefix}_summary.txt
     fi
 
     #gettings script versions
-    dettaxid_version=\$(${ica_bash}determine_taxID.sh -V)
-    pipestats_version=\$(${ica_bash}pipeline_stats_writer.sh -V)
+    dettaxid_version=\$(${script_id} -V)
+    pipestats_version=\$(${script_writer} -V)
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
@@ -109,8 +120,8 @@ process SCAFFOLD_COUNT_CHECK {
         phoenix_base_container: ${container}
         \${dettaxid_version}
         \${pipestats_version}
-        Phoenix_summary_line.py: \$(${ica_python}Phoenix_summary_line.py --version )
-        edit_line_summary.py: \$(${ica_python}edit_line_summary.py --version )
+        Phoenix_summary_line.py: \$(${script_summary} --version )
+        edit_line_summary.py: \$(${script_edit} --version )
     END_VERSIONS
     """
-}
+}
\ No newline at end of file
diff --git a/workflows/phoenix.nf b/workflows/phoenix.nf
index a05229d0..f1a4e128 100644
--- a/workflows/phoenix.nf
+++ b/workflows/phoenix.nf
@@ -211,26 +211,26 @@ workflow PHOENIX_EXTERNAL {
         )
         ch_versions = ch_versions.mix(RENAME_FASTA_HEADERS.out.versions)
 
-        // // Removing scaffolds <500bp
-        // BBMAP_REFORMAT (
-        //     RENAME_FASTA_HEADERS.out.renamed_scaffolds
-        // )
-        // ch_versions = ch_versions.mix(BBMAP_REFORMAT.out.versions)
-
-        // // Combine bbmap log with the fairy outcome file
-        // scaffold_check_ch = BBMAP_REFORMAT.out.log.map{meta, log                -> [[id:meta.id], log]}\
-        // .join(GET_TRIMD_STATS.out.outcome_to_edit.map{   meta, outcome_to_edit  -> [[id:meta.id], outcome_to_edit]},    by: [0])\
-        // .join(GET_RAW_STATS.out.combined_raw_stats.map{meta, combined_raw_stats -> [[id:meta.id], combined_raw_stats]}, by: [0])\
-        // .join(GET_TRIMD_STATS.out.fastp_total_qc.map{  meta, fastp_total_qc     -> [[id:meta.id], fastp_total_qc]},     by: [0])\
-        // .join(KRAKEN2_TRIMD.out.report.map{            meta, report             -> [[id:meta.id], report]},             by: [0])\
-        // .join(KRAKEN2_TRIMD.out.k2_bh_summary.map{     meta, k2_bh_summary      -> [[id:meta.id], k2_bh_summary]},      by: [0])\
-        // .join(KRAKEN2_TRIMD.out.krona_html.map{        meta, krona_html         -> [[id:meta.id], krona_html]},         by: [0])
-
-        // // Checking that there are still scaffolds left after filtering
-        // SCAFFOLD_COUNT_CHECK (
-        //     scaffold_check_ch, false, params.coverage, params.nodes, params.names
-        // )
-        // ch_versions = ch_versions.mix(SCAFFOLD_COUNT_CHECK.out.versions)
+        // Removing scaffolds <500bp
+        BBMAP_REFORMAT (
+            RENAME_FASTA_HEADERS.out.renamed_scaffolds
+        )
+        ch_versions = ch_versions.mix(BBMAP_REFORMAT.out.versions)
+
+        // Combine bbmap log with the fairy outcome file
+        scaffold_check_ch = BBMAP_REFORMAT.out.log.map{      meta, log                -> [[id:meta.id], log]}\
+        .join(GET_TRIMD_STATS.out.outcome.map{               meta, outcome            -> [[id:meta.id], outcome]},    by: [0])\
+        .join(GET_RAW_STATS.out.combined_raw_stats.map{      meta, combined_raw_stats -> [[id:meta.id], combined_raw_stats]}, by: [0])\
+        .join(GET_TRIMD_STATS.out.fastp_total_qc.map{        meta, fastp_total_qc     -> [[id:meta.id], fastp_total_qc]},     by: [0])\
+        .join(KRAKEN2_TRIMD.out.report.map{                  meta, report             -> [[id:meta.id], report]},             by: [0])\
+        .join(KRAKEN2_TRIMD.out.k2_bh_summary.map{           meta, k2_bh_summary      -> [[id:meta.id], k2_bh_summary]},      by: [0])\
+        .join(KRAKEN2_TRIMD.out.krona_html.map{              meta, krona_html         -> [[id:meta.id], krona_html]},         by: [0])
+
+        // Checking that there are still scaffolds left after filtering
+        SCAFFOLD_COUNT_CHECK (
+            scaffold_check_ch, params.extended_qc, params.coverage, params.nodes, params.names
+        )
+        ch_versions = ch_versions.mix(SCAFFOLD_COUNT_CHECK.out.versions)
 
         // //combing scaffolds with scaffold check information to ensure processes that need scaffolds only run when there are scaffolds in the file
         // filtered_scaffolds_ch = BBMAP_REFORMAT.out.filtered_scaffolds.map{    meta, filtered_scaffolds -> [[id:meta.id], filtered_scaffolds]}

From 2fe0069cc81ec6f4e566237e793b10c3e7d2b5ee Mon Sep 17 00:00:00 2001
From: slsevilla <slsevilla@gmail.com>
Date: Wed, 3 Apr 2024 02:57:04 +0000
Subject: [PATCH 24/40] refactor: create extended_qc variable #151

---
 nextflow.config | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/nextflow.config b/nextflow.config
index 827c48f1..660127e1 100755
--- a/nextflow.config
+++ b/nextflow.config
@@ -23,6 +23,8 @@ params {
     // Additional input parameters for -entry SCAFFOLDS and CDC_SCAFFOLDS
     indir                       = null
     scaffolds_ext               = '.scaffolds.fa.gz'
+    extended_qc                 = false
+
 
     // Params for filtering
     minlength                   = 500

From 3059e03515369f1bdc2519b15645e69d7f3fb112 Mon Sep 17 00:00:00 2001
From: slsevilla <slsevilla@gmail.com>
Date: Wed, 3 Apr 2024 13:27:57 +0000
Subject: [PATCH 25/40] refactor: filtering #147

---
 subworkflows/local/spades_failure.nf | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/subworkflows/local/spades_failure.nf b/subworkflows/local/spades_failure.nf
index 878b73bd..c5a70acc 100755
--- a/subworkflows/local/spades_failure.nf
+++ b/subworkflows/local/spades_failure.nf
@@ -85,7 +85,8 @@ workflow SPADES_WF {
             // Combining weighted kraken report with the FastANI hit based on meta.id
             best_hit_ch = k2_bh_summary.map{                         meta, ksummary       -> [[id:meta.id], ksummary]}\
             .join(SPADES.out.spades_outcome.splitCsv(strip:true).map{meta, spades_outcome -> [[id:meta.id], spades_outcome]})
-
+            .filter { it[2][0].contains('run_failure') || it[2][1].contains('no_scaffolds') || it[2][2].contains('no_contigs')}
+            
             // Getting ID from either FastANI or if fails, from Kraken2
             DETERMINE_TAXA_ID_FAILURE (
                 best_hit_ch, params.nodes, params.names

From 7720ec4004eb67bac062db298c4cd4e4538cf329 Mon Sep 17 00:00:00 2001
From: slsevilla <slsevilla@gmail.com>
Date: Wed, 3 Apr 2024 13:42:50 +0000
Subject: [PATCH 26/40] refactor: move filtering to workflow level #147

---
 modules/local/gamma.nf         |  4 --
 modules/local/mash_distance.nf |  4 --
 modules/local/prokka.nf        | 16 +-----
 modules/local/quast.nf         |  4 --
 workflows/phoenix.nf           | 96 +++++++++++++++++-----------------
 5 files changed, 51 insertions(+), 73 deletions(-)

diff --git a/modules/local/gamma.nf b/modules/local/gamma.nf
index 93c8a1db..1cc433d7 100755
--- a/modules/local/gamma.nf
+++ b/modules/local/gamma.nf
@@ -15,10 +15,6 @@ process GAMMA {
     tuple val(meta), path("*.fasta"), optional:true , emit: fasta
     path "versions.yml"                             , emit: versions
 
-    when:
-    //if there are scaffolds left after filtering
-    "${fairy_outcome[4]}" == "PASSED: More than 0 scaffolds in ${meta.id} after filtering."
-
     script:
     def args = task.ext.args ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
diff --git a/modules/local/mash_distance.nf b/modules/local/mash_distance.nf
index af7d7ac9..f969b95b 100755
--- a/modules/local/mash_distance.nf
+++ b/modules/local/mash_distance.nf
@@ -11,10 +11,6 @@ process MASH_DIST {
     tuple val(meta), path("*.txt"), emit: dist
     path("versions.yml")          , emit: versions
 
-    when:
-    //if there are scaffolds left after filtering
-    "${fairy_outcome[4]}" == "PASSED: More than 0 scaffolds in ${meta.id} after filtering."
-
     script:
     def args = task.ext.args ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
diff --git a/modules/local/prokka.nf b/modules/local/prokka.nf
index ceee5c6e..c3902891 100755
--- a/modules/local/prokka.nf
+++ b/modules/local/prokka.nf
@@ -24,27 +24,15 @@ process PROKKA {
     tuple val(meta), path("*.tsv"), emit: tsv
     path "versions.yml" , emit: versions
 
-    when:
-    //if there are scaffolds left after filtering
-    "${fairy_outcome[4]}" == "PASSED: More than 0 scaffolds in ${meta.id} after filtering."
-
     script:
-    //set up for terra
-    if (params.terra==false) {
-        terra = ""
-        terra_exit = ""
-    } else if (params.terra==true) {
-        terra = "PATH=/opt/conda/envs/prokka/bin:\$PATH"
-        terra_exit = """PATH="\$(printf '%s\\n' "\$PATH" | sed 's|/opt/conda/envs/prokka/bin:||')" """
-    } else {
-        error "Please set params.terra to either \"true\" or \"false\""
-    }
     //define variables
     def args = task.ext.args   ?: ''
     prefix   = task.ext.prefix ?: "${meta.id}"
     def proteins_opt = proteins ? "--proteins ${proteins[0]}" : ""
     def prodigal_opt = prodigal_tf ? "--prodigaltf ${prodigal_tf[0]}" : ""
     def container = task.container.toString() - "staphb/prokka@"
+    def terra = params.terra ? "PATH=/opt/conda/envs/prokka/bin:\$PATH" : ""
+    def terra_exit = params.terra ? """PATH="\$(printf '%s\\n' "\$PATH" | sed 's|/opt/conda/envs/prokka/bin:||')" """ : ""
     """
     #adding python path for running busco on terra
     $terra
diff --git a/modules/local/quast.nf b/modules/local/quast.nf
index 59bf2726..92e3df1b 100755
--- a/modules/local/quast.nf
+++ b/modules/local/quast.nf
@@ -11,10 +11,6 @@ process QUAST {
     tuple val(meta), path('*.tsv')        , emit: report_tsv
     path "versions.yml"                   , emit: versions
 
-    when:
-    //if the files are not corrupt and there are equal number of reads in each file then run bbduk
-    "${fairy_outcome[4]}" == "PASSED: More than 0 scaffolds in ${meta.id} after filtering."
-
     script:
     def args     = task.ext.args   ?: ''
     def prefix   = task.ext.prefix ?: "${meta.id}"
diff --git a/workflows/phoenix.nf b/workflows/phoenix.nf
index f1a4e128..0153ab07 100644
--- a/workflows/phoenix.nf
+++ b/workflows/phoenix.nf
@@ -232,56 +232,64 @@ workflow PHOENIX_EXTERNAL {
         )
         ch_versions = ch_versions.mix(SCAFFOLD_COUNT_CHECK.out.versions)
 
-        // //combing scaffolds with scaffold check information to ensure processes that need scaffolds only run when there are scaffolds in the file
-        // filtered_scaffolds_ch = BBMAP_REFORMAT.out.filtered_scaffolds.map{    meta, filtered_scaffolds -> [[id:meta.id], filtered_scaffolds]}
-        // .join(SCAFFOLD_COUNT_CHECK.out.outcome.splitCsv(strip:true, by:5).map{meta, fairy_outcome      -> [meta, [fairy_outcome[0][0], fairy_outcome[1][0], fairy_outcome[2][0], fairy_outcome[3][0], fairy_outcome[4][0]]]}, by: [0])
+        //combing scaffolds with scaffold check information to ensure processes that need scaffolds only run when there are scaffolds in the file
+        filtered_scaffolds_ch = BBMAP_REFORMAT.out.filtered_scaffolds.map{    meta, filtered_scaffolds -> [[id:meta.id], filtered_scaffolds]}
+            .join(SCAFFOLD_COUNT_CHECK.out.outcome.splitCsv(strip:true, by:5)
+            .map{meta, fairy_outcome      -> [meta, [fairy_outcome[0][0], fairy_outcome[1][0], fairy_outcome[2][0], fairy_outcome[3][0], fairy_outcome[4][0]]]}, by: [0])
+            .filter { it[2].findAll {it.contains('PASSED: More than 0 scaffolds')}}
+        
+        // Running gamma to identify hypervirulence genes in scaffolds
+        GAMMA_HV (
+            filtered_scaffolds_ch, params.hvgamdb
+        )
+        ch_versions = ch_versions.mix(GAMMA_HV.out.versions)
 
-        // // Running gamma to identify hypervirulence genes in scaffolds
-        // GAMMA_HV (
-        //     filtered_scaffolds_ch, params.hvgamdb
-        // )
-        // ch_versions = ch_versions.mix(GAMMA_HV.out.versions)
+        // Running gamma to identify AR genes in scaffolds
+        GAMMA_AR (
+            filtered_scaffolds_ch, params.ardb
+        )
+        ch_versions = ch_versions.mix(GAMMA_AR.out.versions)
 
-        // // Running gamma to identify AR genes in scaffolds
-        // GAMMA_AR (
-        //     filtered_scaffolds_ch, params.ardb
-        // )
-        // ch_versions = ch_versions.mix(GAMMA_AR.out.versions)
+        GAMMA_PF (
+            filtered_scaffolds_ch, params.gamdbpf
+        )
+        ch_versions = ch_versions.mix(GAMMA_PF.out.versions)
 
-        // GAMMA_PF (
-        //     filtered_scaffolds_ch, params.gamdbpf
-        // )
-        // ch_versions = ch_versions.mix(GAMMA_PF.out.versions)
+        // Getting Assembly Stats
+        QUAST (
+            filtered_scaffolds_ch
+        )
+        ch_versions = ch_versions.mix(QUAST.out.versions)
 
-        // // Getting Assembly Stats
-        // QUAST (
-        //     filtered_scaffolds_ch
-        // )
-        // ch_versions = ch_versions.mix(QUAST.out.versions)
+        // get gff and protein files for amrfinder+
+        PROKKA (
+            filtered_scaffolds_ch, [], []
+        )
+        ch_versions = ch_versions.mix(PROKKA.out.versions)
 
-        // // Creating krona plots and best hit files for weighted assembly
-        // KRAKEN2_WTASMBLD (
-        //     BBMAP_REFORMAT.out.filtered_scaffolds, SCAFFOLD_COUNT_CHECK.out.outcome, "wtasmbld", [], QUAST.out.report_tsv, ASSET_CHECK.out.kraken_db, "reads"
-        // )
-        // ch_versions = ch_versions.mix(KRAKEN2_WTASMBLD.out.versions)
+        // Creating krona plots and best hit files for weighted assembly
+        KRAKEN2_WTASMBLD (
+            BBMAP_REFORMAT.out.filtered_scaffolds, SCAFFOLD_COUNT_CHECK.out.outcome, "wtasmbld", [], QUAST.out.report_tsv, ASSET_CHECK.out.kraken_db, "reads"
+        )
+        ch_versions = ch_versions.mix(KRAKEN2_WTASMBLD.out.versions)
 
-        // // combine filtered scaffolds and mash_sketch so mash_sketch goes with each filtered_scaffolds file
-        // mash_dist_ch = filtered_scaffolds_ch.combine(ASSET_CHECK.out.mash_sketch)
+        // combine filtered scaffolds and mash_sketch so mash_sketch goes with each filtered_scaffolds file
+        mash_dist_ch = filtered_scaffolds_ch.combine(ASSET_CHECK.out.mash_sketch)
 
-        // // Running Mash distance to get top 20 matches for fastANI to speed things up
-        // MASH_DIST (
-        //     mash_dist_ch
-        // )
-        // ch_versions = ch_versions.mix(MASH_DIST.out.versions)
+        // Running Mash distance to get top 20 matches for fastANI to speed things up
+        MASH_DIST (
+            mash_dist_ch
+        )
+        ch_versions = ch_versions.mix(MASH_DIST.out.versions)
 
-        // // Combining mash dist with filtered scaffolds and the outcome of the scaffolds count check based on meta.id
-        // top_mash_hits_ch = MASH_DIST.out.dist.join(filtered_scaffolds_ch, by: [0])
+        // Combining mash dist with filtered scaffolds and the outcome of the scaffolds count check based on meta.id
+        top_mash_hits_ch = MASH_DIST.out.dist.join(filtered_scaffolds_ch, by: [0])
 
-        // // Generate file with list of paths of top taxa for fastANI
-        // DETERMINE_TOP_MASH_HITS (
-        //     top_mash_hits_ch
-        // )
-        // ch_versions = ch_versions.mix(DETERMINE_TOP_MASH_HITS.out.versions)
+        // Generate file with list of paths of top taxa for fastANI
+        DETERMINE_TOP_MASH_HITS (
+            top_mash_hits_ch
+        )
+        ch_versions = ch_versions.mix(DETERMINE_TOP_MASH_HITS.out.versions)
 
         // // Combining filtered scaffolds with the top taxa list based on meta.id
         // top_taxa_list_ch = BBMAP_REFORMAT.out.filtered_scaffolds.map{meta, filtered_scaffolds -> [[id:meta.id], filtered_scaffolds]}\
@@ -322,12 +330,6 @@ workflow PHOENIX_EXTERNAL {
         // )
         // ch_versions = ch_versions.mix(DO_MLST.out.versions)
 
-        // // get gff and protein files for amrfinder+
-        // PROKKA (
-        //     filtered_scaffolds_ch, [], []
-        // )
-        // ch_versions = ch_versions.mix(PROKKA.out.versions)
-
         // /*// Fetch AMRFinder Database
         // AMRFINDERPLUS_UPDATE( )
         // ch_versions = ch_versions.mix(AMRFINDERPLUS_UPDATE.out.versions)*/

From c22897ca9e4dd7041083613b9b5dbb7da1eeda98 Mon Sep 17 00:00:00 2001
From: slsevilla <slsevilla@gmail.com>
Date: Wed, 3 Apr 2024 13:51:50 +0000
Subject: [PATCH 27/40] refactor: scaffolds samplesshet ica #149

---
 modules/local/scaffolds_samplesheet_check.nf | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/modules/local/scaffolds_samplesheet_check.nf b/modules/local/scaffolds_samplesheet_check.nf
index cd701ae0..084bf95b 100644
--- a/modules/local/scaffolds_samplesheet_check.nf
+++ b/modules/local/scaffolds_samplesheet_check.nf
@@ -12,22 +12,19 @@ process SCAFFOLDS_SAMPLESHEET_CHECK {
     path "versions.yml", emit: versions
 
     script: // This script is bundled with the pipeline, in cdcgov/phoenix/bin/
-    // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory.
-    if (params.ica==false) { ica = "" } 
-    else if (params.ica==true) { ica = "python ${workflow.launchDir}/bin/" }
-    else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." }
     // define variables
     def container_version = "base_v2.1.0"
     def container = task.container.toString() - "quay.io/jvhagey/phoenix@"
+    def script = params.ica ? "python ${params.ica_path}/check_assembly_samplesheet.py" : "check_assembly_samplesheet.py"
     """
-    ${ica}check_assembly_samplesheet.py \\
+    ${script} \\
     $samplesheet \\
     samplesheet.valid.csv
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         python: \$(python --version | sed 's/Python //g')
-        check_assembly_samplesheet.py: \$(${ica}check_assembly_samplesheet.py --version )
+        check_assembly_samplesheet.py: \$(${script} --version )
         phoenix_base_container_tag: ${container_version}
         phoenix_base_container: ${container} 
     END_VERSIONS

From ef6339bbbd22184d28e6b6c5dc9574b1e32ecf16 Mon Sep 17 00:00:00 2001
From: slsevilla <slsevilla@gmail.com>
Date: Wed, 3 Apr 2024 14:01:11 +0000
Subject: [PATCH 28/40] refactor: kraken2 makereport, top mash hits ica #149

---
 modules/local/determine_top_mash_hits.nf | 14 ++++----------
 modules/local/krakentools_makekreport.nf |  7 ++-----
 2 files changed, 6 insertions(+), 15 deletions(-)

diff --git a/modules/local/determine_top_mash_hits.nf b/modules/local/determine_top_mash_hits.nf
index 7b809a56..94f47217 100644
--- a/modules/local/determine_top_mash_hits.nf
+++ b/modules/local/determine_top_mash_hits.nf
@@ -17,25 +17,19 @@ process DETERMINE_TOP_MASH_HITS {
     "${fairy_outcome[4]}" == "PASSED: More than 0 scaffolds in ${meta.id} after filtering."
 
     script: // This script is bundled with the pipeline, in cdcgov/phoenix/bin/
-    // terra=true sets paths for bc/wget for terra container paths
-    if (params.terra==false) { terra = ""} 
-    else if (params.terra==true) { terra = "-t terra" }
-    else { error "Please set params.terra to either \"true\" or \"false\"" }
-    // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory.
-    if (params.ica==false) { ica = "" } 
-    else if (params.ica==true) { ica = "bash ${workflow.launchDir}/bin/" }
-    else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." }
     // define variables
     def prefix = task.ext.prefix ?: "${meta.id}"
     def sample_name = "${mash_dists}" - ".txt" //get full sample name with REFSEQ_DATE
     def container_version = "base_v2.1.0"
     def container = task.container.toString() - "quay.io/jvhagey/phoenix@"
+    def script = params.ica ? "${params.ica_path}/sort_and_prep_dist.sh" : "sort_and_prep_dist.sh"
+    def terra = params.terra ? "-t terra" : ""
     """
     mkdir reference_dir
 
-    ${ica}sort_and_prep_dist.sh -a $assembly_scaffolds -x $mash_dists -o reference_dir $terra
+    ${script} -a $assembly_scaffolds -x $mash_dists -o reference_dir $terra
 
-    script_version=\$(${ica}sort_and_prep_dist.sh -V)
+    script_version=\$(${script} -V)
 
     if [[ ! -f ${sample_name}_best_MASH_hits.txt ]]; then
         echo "No MASH hit found" > ${sample_name}_best_MASH_hits.txt
diff --git a/modules/local/krakentools_makekreport.nf b/modules/local/krakentools_makekreport.nf
index 508d1566..b86b66bc 100644
--- a/modules/local/krakentools_makekreport.nf
+++ b/modules/local/krakentools_makekreport.nf
@@ -13,17 +13,14 @@ process KRAKENTOOLS_MAKEKREPORT {
 
     script: // This script is bundled with the pipeline, in phoenix/bin/
     // This script has to be run with kraken output that does not use --use-names flag https://github.com/jenniferlu717/KrakenTools/issues/29
-    // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory.
-    if (params.ica==false) { ica = "" } 
-    else if (params.ica==true) { ica = "python ${workflow.launchDir}/bin/" }
-    else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." }
     // define variables
     def prefix = task.ext.prefix ?: "${meta.id}"
     def container_version = "base_v2.1.0"
     def krakentools_version = "1.2"
     def container = task.container.toString() - "quay.io/jvhagey/phoenix@"
+    def script = params.ica ? "python ${params.ica_path}/make_kreport.py" : "make_kreport.py"
     """
-    ${ica}make_kreport.py \\
+    ${script} \\
         --input ${kraken_output} \\
         --output ${prefix}.kraken2_wtasmbld.summary.txt \\
         --taxonomy ${kraken2db_path}/ktaxonomy.tsv \\

From 5ab72ca9187a7d537da9c270d67bb55c49994e75 Mon Sep 17 00:00:00 2001
From: slsevilla <slsevilla@gmail.com>
Date: Wed, 3 Apr 2024 14:01:40 +0000
Subject: [PATCH 29/40] refactor: phoenix wf filtering, ica #147 #149

---
 workflows/phoenix.nf | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/workflows/phoenix.nf b/workflows/phoenix.nf
index 0153ab07..6d93ec9a 100644
--- a/workflows/phoenix.nf
+++ b/workflows/phoenix.nf
@@ -291,10 +291,10 @@ workflow PHOENIX_EXTERNAL {
         )
         ch_versions = ch_versions.mix(DETERMINE_TOP_MASH_HITS.out.versions)
 
-        // // Combining filtered scaffolds with the top taxa list based on meta.id
-        // top_taxa_list_ch = BBMAP_REFORMAT.out.filtered_scaffolds.map{meta, filtered_scaffolds -> [[id:meta.id], filtered_scaffolds]}\
-        // .join(DETERMINE_TOP_MASH_HITS.out.top_taxa_list.map{              meta, top_taxa_list      -> [[id:meta.id], top_taxa_list ]}, by: [0])\
-        // .join(DETERMINE_TOP_MASH_HITS.out.reference_dir.map{              meta, reference_dir      -> [[id:meta.id], reference_dir ]}, by: [0])
+        // Combining filtered scaffolds with the top taxa list based on meta.id
+        top_taxa_list_ch = BBMAP_REFORMAT.out.filtered_scaffolds.map{meta, filtered_scaffolds -> [[id:meta.id], filtered_scaffolds]}\
+        .join(DETERMINE_TOP_MASH_HITS.out.top_taxa_list.map{              meta, top_taxa_list      -> [[id:meta.id], top_taxa_list ]}, by: [0])\
+        .join(DETERMINE_TOP_MASH_HITS.out.reference_dir.map{              meta, reference_dir      -> [[id:meta.id], reference_dir ]}, by: [0])
 
         // // Getting species ID
         // FASTANI (

From fd2463ac891332c9a2ad9ccbf522556585c8dfff Mon Sep 17 00:00:00 2001
From: slsevilla <slsevilla@gmail.com>
Date: Wed, 3 Apr 2024 14:07:16 +0000
Subject: [PATCH 30/40] refactor: determine taxaID, fast ani ica #149

---
 modules/local/determine_taxa_id.nf   |  9 +++----
 modules/local/format_ANI_best_hit.nf | 14 +++-------
 workflows/phoenix.nf                 | 38 ++++++++++++++--------------
 3 files changed, 26 insertions(+), 35 deletions(-)

diff --git a/modules/local/determine_taxa_id.nf b/modules/local/determine_taxa_id.nf
index 5f826a76..b5cf971f 100644
--- a/modules/local/determine_taxa_id.nf
+++ b/modules/local/determine_taxa_id.nf
@@ -14,20 +14,17 @@ process DETERMINE_TAXA_ID {
     path("versions.yml")          , emit: versions
 
     script: // This script is bundled with the pipeline, in cdcgov/phoenix/bin/
-    // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory.
-    if (params.ica==false) { ica = "" } 
-    else if (params.ica==true) { ica = "bash ${workflow.launchDir}/bin/" }
-    else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." }
     // define variables
     def prefix = task.ext.prefix ?: "${meta.id}"
     // -r needs to be last as in -entry SCAFFOLDS/CDC_SCAFFOLDS k2_bh_summary is not passed so its a blank argument
     def k2_bh_file = k2_bh_summary ? "-r $k2_bh_summary" : ""
     def container_version = "base_v2.1.0"
     def container = task.container.toString() - "quay.io/jvhagey/phoenix@"
+    def script = params.ica ? "${params.ica_path}/determine_taxID.sh" : "determine_taxID.sh"
     """
-    ${ica}determine_taxID.sh -k $kraken_weighted -s $meta.id -f $formatted_ani_file -d $nodes_file -m $names_file $k2_bh_file
+    ${script} -k $kraken_weighted -s $meta.id -f $formatted_ani_file -d $nodes_file -m $names_file $k2_bh_file
 
-    script_version=\$(${ica}determine_taxID.sh -V)
+    script_version=\$(${script} -V)
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
diff --git a/modules/local/format_ANI_best_hit.nf b/modules/local/format_ANI_best_hit.nf
index bafb13e5..fd706a63 100644
--- a/modules/local/format_ANI_best_hit.nf
+++ b/modules/local/format_ANI_best_hit.nf
@@ -12,18 +12,12 @@ process FORMAT_ANI {
     path("versions.yml"),                   emit: versions
 
     script: // This script is bundled with the pipeline, in cdcgov/phoenix/bin/
-    // terra=true sets paths for bc/wget for terra container paths
-    if (params.terra==false) { terra = ""} 
-    else if (params.terra==true) { terra = "-t terra" }
-    else { error "Please set params.terra to either \"true\" or \"false\"" }
-    // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory.
-    if (params.ica==false) { ica = "" } 
-    else if (params.ica==true) { ica = "bash ${workflow.launchDir}/bin/" }
-    else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." }
     // define variables
     def prefix = task.ext.prefix ?: "${meta.id}"
     def container_version = "base_v2.1.0"
     def container = task.container.toString() - "quay.io/jvhagey/phoenix@"
+    def script = params.ica ? "${params.ica_path}/ANI_best_hit_formatter.sh" : "ANI_best_hit_formatter.sh"
+    def terra = params.terra ? "-t terra" : ""
     """
     line=\$(head -n1 ${ani_file})
     if [[ "\${line}" == "Mash/FastANI Error:"* ]]; then
@@ -35,10 +29,10 @@ process FORMAT_ANI {
             db_version="REFSEQ_unknown"
         fi
         # script also checks that match is 80 or > otherwise an error is thrown
-        ${ica}ANI_best_hit_formatter.sh -a ${ani_file} -n ${prefix} -d \${db_version} ${terra}
+        ${script} -a ${ani_file} -n ${prefix} -d \${db_version} ${terra}
     fi
 
-    script_version=\$(${ica}ANI_best_hit_formatter.sh -V)
+    script_version=\$(${script} -V)
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
diff --git a/workflows/phoenix.nf b/workflows/phoenix.nf
index 6d93ec9a..335ed968 100644
--- a/workflows/phoenix.nf
+++ b/workflows/phoenix.nf
@@ -296,28 +296,28 @@ workflow PHOENIX_EXTERNAL {
         .join(DETERMINE_TOP_MASH_HITS.out.top_taxa_list.map{              meta, top_taxa_list      -> [[id:meta.id], top_taxa_list ]}, by: [0])\
         .join(DETERMINE_TOP_MASH_HITS.out.reference_dir.map{              meta, reference_dir      -> [[id:meta.id], reference_dir ]}, by: [0])
 
-        // // Getting species ID
-        // FASTANI (
-        //     top_taxa_list_ch
-        // )
-        // ch_versions = ch_versions.mix(FASTANI.out.versions)
+        // Getting species ID
+        FASTANI (
+            top_taxa_list_ch
+        )
+        ch_versions = ch_versions.mix(FASTANI.out.versions)
 
-        // // Reformat ANI headers
-        // FORMAT_ANI (
-        //     FASTANI.out.ani
-        // )
-        // ch_versions = ch_versions.mix(FORMAT_ANI.out.versions)
+        // Reformat ANI headers
+        FORMAT_ANI (
+            FASTANI.out.ani
+        )
+        ch_versions = ch_versions.mix(FORMAT_ANI.out.versions)
 
-        // // Combining weighted kraken report with the FastANI hit based on meta.id
-        // best_hit_ch = KRAKEN2_WTASMBLD.out.k2_bh_summary.map{meta, k2_bh_summary -> [[id:meta.id], k2_bh_summary]}\
-        // .join(FORMAT_ANI.out.ani_best_hit.map{               meta, ani_best_hit  -> [[id:meta.id], ani_best_hit ]},  by: [0])\
-        // .join(KRAKEN2_TRIMD.out.k2_bh_summary.map{           meta, k2_bh_summary -> [[id:meta.id], k2_bh_summary ]}, by: [0])
+        // Combining weighted kraken report with the FastANI hit based on meta.id
+        best_hit_ch = KRAKEN2_WTASMBLD.out.k2_bh_summary.map{meta, k2_bh_summary -> [[id:meta.id], k2_bh_summary]}\
+        .join(FORMAT_ANI.out.ani_best_hit.map{               meta, ani_best_hit  -> [[id:meta.id], ani_best_hit ]},  by: [0])\
+        .join(KRAKEN2_TRIMD.out.k2_bh_summary.map{           meta, k2_bh_summary -> [[id:meta.id], k2_bh_summary ]}, by: [0])
 
-        // // Getting ID from either FastANI or if fails, from Kraken2
-        // DETERMINE_TAXA_ID (
-        //     best_hit_ch, params.nodes, params.names
-        // )
-        // ch_versions = ch_versions.mix(DETERMINE_TAXA_ID.out.versions)
+        // Getting ID from either FastANI or if fails, from Kraken2
+        DETERMINE_TAXA_ID (
+            best_hit_ch, params.nodes, params.names
+        )
+        ch_versions = ch_versions.mix(DETERMINE_TAXA_ID.out.versions)
 
         // // Perform MLST steps on isolates (with srst2 on internal samples)
         // DO_MLST (

From b05647854b11b4a7d022a7199d9ecba9d6972fe8 Mon Sep 17 00:00:00 2001
From: slsevilla <slsevilla@gmail.com>
Date: Wed, 3 Apr 2024 14:22:52 +0000
Subject: [PATCH 31/40] refactor: mlst ica #149

---
 modules/local/check_mlst_with_srst2.nf | 11 ++++-------
 modules/local/get_mlst_srst2.nf        |  9 +++------
 modules/local/mlst.nf                  |  5 +----
 workflows/phoenix.nf                   | 20 ++++++++++----------
 4 files changed, 18 insertions(+), 27 deletions(-)

diff --git a/modules/local/check_mlst_with_srst2.nf b/modules/local/check_mlst_with_srst2.nf
index eb2476d3..753e41d2 100644
--- a/modules/local/check_mlst_with_srst2.nf
+++ b/modules/local/check_mlst_with_srst2.nf
@@ -17,18 +17,15 @@ process CHECK_MLST_WITH_SRST2 {
     task.ext.when == null || task.ext.when
 
     script:
-    // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory.
-    if (params.ica==false) { ica = "" } 
-    else if (params.ica==true) { ica = "python ${workflow.launchDir}/bin/" }
-    else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." }
     // define variables
     def container_version = "base_v2.1.0"
     def container = task.container.toString() - "quay.io/jvhagey/phoenix@"
+    def script = params.ica ? "python ${params.ica_path}/fix_MLST2.py" : "fix_MLST2.py"
     """
     if [[ "${status[0]}" == "True" ]]; then
-        ${ica}fix_MLST2.py --input $mlst_file --srst2 $srst2_file --taxonomy $taxonomy_file --mlst_database $local_dbases
+        ${script} --input $mlst_file --srst2 $srst2_file --taxonomy $taxonomy_file --mlst_database $local_dbases
     elif [[ "${status[0]}" == "False" ]]; then
-        ${ica}fix_MLST2.py --input $mlst_file --taxonomy $taxonomy_file --mlst_database $local_dbases
+        ${script} --input $mlst_file --taxonomy $taxonomy_file --mlst_database $local_dbases
     else 
         echo "Something went very wrong, please open an issue on Github for the PHoeNIx developers to address."
     fi
@@ -36,7 +33,7 @@ process CHECK_MLST_WITH_SRST2 {
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         python: \$(python --version | sed 's/Python //g')
-        fix_MLST2.py: \$(${ica}fix_MLST2.py --version )
+        fix_MLST2.py: \$(${script} --version )
         phoenix_base_container_tag: ${container_version}
         phoenix_base_container: ${container}
     END_VERSIONS
diff --git a/modules/local/get_mlst_srst2.nf b/modules/local/get_mlst_srst2.nf
index 24e66b6d..b8316d77 100755
--- a/modules/local/get_mlst_srst2.nf
+++ b/modules/local/get_mlst_srst2.nf
@@ -21,13 +21,10 @@ process GET_MLST_SRST2 {
     (task.ext.when == null || task.ext.when) //& "${status[0]}" == "False"
 
     script:
-    // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory.
-    if (params.ica==false) { ica = "" } 
-    else if (params.ica==true) { ica = "python ${workflow.launchDir}/bin/" }
-    else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." }
     // define variables
     def prefix = task.ext.prefix ?: "${meta.id}"
     def container_version = task.container.toString() - "quay.io/biocontainers/python@"
+    def script = params.ica ? "python ${params.ica_path}/local_MLST_converter.py" : "local_MLST_converter.py"
     """
     if [[ "${status[0]}" == "False" ]]; then
         genus="empty"
@@ -47,7 +44,7 @@ process GET_MLST_SRST2 {
         echo "\${genus}___\${species}"
         # Old way, now use provided DB with different name format
         # convert_taxonomy_with_complexes_to_pubMLST.py --genus "\${genus}" --species "\${species}" > DB_defs.txt
-        ${ica}local_MLST_converter.py --genus "\${genus}" --species "\${species}" > DB_defs.txt
+        ${script} --genus "\${genus}" --species "\${species}" > DB_defs.txt
 
         dbline=\$(tail -n1 DB_defs.txt)
         echo "\$dbline"
@@ -100,7 +97,7 @@ process GET_MLST_SRST2 {
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
-        local_MLST_converter.py: \$(${ica}local_MLST_converter.py --version )
+        local_MLST_converter.py: \$(${script} --version )
         python: \$(python --version | sed 's/Python //g')
         python_container: ${container_version}
     END_VERSIONS
diff --git a/modules/local/mlst.nf b/modules/local/mlst.nf
index ed14b0f9..4a324c62 100644
--- a/modules/local/mlst.nf
+++ b/modules/local/mlst.nf
@@ -16,10 +16,6 @@ process MLST {
     "${fairy_outcome[4]}" == "PASSED: More than 0 scaffolds in ${meta.id} after filtering."
 
     script:
-    // helps set correct paths to get database version being used
-    if (params.terra==false) { terra = false }
-    else if (params.terra==true) { terra = true}
-    else { error "Please set params.terra to either \"true\" or \"false\""}
     //define variables
     def args = task.ext.args ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
@@ -27,6 +23,7 @@ process MLST {
     def container = task.container.toString() - "quay.io/jvhagey/mlst@"
     def mlst_version = "2.23.0_01242024"
     def mlst_version_clean = mlst_version.split("_")[0]
+    def terra = params.terra ? "true" : "false"
     """
     if [[ ${fasta} = *.gz ]]
     then
diff --git a/workflows/phoenix.nf b/workflows/phoenix.nf
index 335ed968..4d8d7b07 100644
--- a/workflows/phoenix.nf
+++ b/workflows/phoenix.nf
@@ -319,16 +319,16 @@ workflow PHOENIX_EXTERNAL {
         )
         ch_versions = ch_versions.mix(DETERMINE_TAXA_ID.out.versions)
 
-        // // Perform MLST steps on isolates (with srst2 on internal samples)
-        // DO_MLST (
-        //     BBMAP_REFORMAT.out.filtered_scaffolds, \
-        //     SCAFFOLD_COUNT_CHECK.out.outcome, \
-        //     FASTP_TRIMD.out.reads, \
-        //     DETERMINE_TAXA_ID.out.taxonomy, \
-        //     ASSET_CHECK.out.mlst_db, \
-        //     false
-        // )
-        // ch_versions = ch_versions.mix(DO_MLST.out.versions)
+        // Perform MLST steps on isolates (with srst2 on internal samples)
+        DO_MLST (
+            BBMAP_REFORMAT.out.filtered_scaffolds, \
+            SCAFFOLD_COUNT_CHECK.out.outcome, \
+            FASTP_TRIMD.out.reads, \
+            DETERMINE_TAXA_ID.out.taxonomy, \
+            ASSET_CHECK.out.mlst_db, \
+            params.run_srst2_mlst
+        )
+        ch_versions = ch_versions.mix(DO_MLST.out.versions)
 
         // /*// Fetch AMRFinder Database
         // AMRFINDERPLUS_UPDATE( )

From 8c768434532a6f331f651112c3575b371761ffc2 Mon Sep 17 00:00:00 2001
From: slsevilla <slsevilla@gmail.com>
Date: Wed, 3 Apr 2024 14:44:31 +0000
Subject: [PATCH 32/40] refactor: mlst, amrfinder for terra, ica #149

---
 modules/local/check_mlst.nf             |  8 ++--
 modules/local/get_taxa_for_amrfinder.nf |  7 +---
 modules/local/run_amrfinder.nf          | 17 ++------
 workflows/phoenix.nf                    | 52 ++++++++++++-------------
 4 files changed, 34 insertions(+), 50 deletions(-)

diff --git a/modules/local/check_mlst.nf b/modules/local/check_mlst.nf
index 8c02f03a..d84beead 100644
--- a/modules/local/check_mlst.nf
+++ b/modules/local/check_mlst.nf
@@ -12,9 +12,6 @@ process CHECK_MLST {
     tuple val(meta), path("*_status.txt"),   emit: status
     path("versions.yml")                 ,   emit: versions
 
-    when:
-    task.ext.when == null || task.ext.when
-
     script:
     // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory.
     if (params.ica==false) { ica = "" } 
@@ -22,13 +19,14 @@ process CHECK_MLST {
     else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." }
     def container_version = "base_v2.1.0"
     def container = task.container.toString() - "quay.io/jvhagey/phoenix@"
+    def script = params.ica ? "python ${params.ica_path}/fix_MLST2.py" : "fix_MLST2.py"
     """
-    ${ica}fix_MLST2.py --input $mlst_file --taxonomy $taxonomy_file --mlst_database ${local_dbases}
+    ${script} --input $mlst_file --taxonomy $taxonomy_file --mlst_database ${local_dbases}
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         python: \$(python --version | sed 's/Python //g')
-        fix_MLST2.py: \$(${ica}fix_MLST2.py --version )
+        fix_MLST2.py: \$(${script} --version )
         phoenix_base_container_tag: ${container_version}
         phoenix_base_container: ${container}
     END_VERSIONS
diff --git a/modules/local/get_taxa_for_amrfinder.nf b/modules/local/get_taxa_for_amrfinder.nf
index 906b26ae..af6f46dc 100644
--- a/modules/local/get_taxa_for_amrfinder.nf
+++ b/modules/local/get_taxa_for_amrfinder.nf
@@ -12,16 +12,13 @@ process GET_TAXA_FOR_AMRFINDER {
     path("versions.yml"),                           emit: versions
 
     script: // This script is bundled with the pipeline, in cdcgov/phoenix/bin/
-    // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory.
-    if (params.ica==false) { ica = "" } 
-    else if (params.ica==true) { ica = "python ${workflow.launchDir}/bin/" }
-    else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." }
     // define variables
     def prefix = task.ext.prefix ?: "${meta.id}"
     def container_version = "base_v2.1.0"
     def container = task.container.toString() - "quay.io/jvhagey/phoenix@"
+    def script = params.ica ? "python ${params.ica_path}/get_taxa_for_amrfinder.py" : "get_taxa_for_amrfinder.py"
     """
-    ${ica}get_taxa_for_amrfinder.py -t $taxa_file -o ${prefix}_AMRFinder_Organism.csv
+    ${script} -t $taxa_file -o ${prefix}_AMRFinder_Organism.csv
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
diff --git a/modules/local/run_amrfinder.nf b/modules/local/run_amrfinder.nf
index e6964722..4d41e09f 100755
--- a/modules/local/run_amrfinder.nf
+++ b/modules/local/run_amrfinder.nf
@@ -18,24 +18,13 @@ process AMRFINDERPLUS_RUN {
     task.ext.when == null || task.ext.when
 
     script:
-    // use --organism
-    if ( "${organism_param[0]}" != "No Match Found") {
-        organism = "--organism ${organism_param[0]}"
-    } else { organism = "" }
-    //set up for terra
-    if (params.terra==false) {
-        terra = ""
-        terra_exit = ""
-    } else if (params.terra==true) {
-        terra = "PATH=/opt/conda/envs/amrfinderplus/bin:\$PATH"
-        terra_exit = """PATH="\$(printf '%s\\n' "\$PATH" | sed 's|/opt/conda/envs/amrfinderplus/bin:||')" """
-    } else {
-        error "Please set params.terra to either \"true\" or \"false\""
-    }
     // define variables
     def args = task.ext.args ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
     def container = task.container.toString() - "staphb/ncbi-amrfinderplus@"
+    def terra = params.terra ? "PATH=/opt/conda/envs/amrfinderplus/bin:\$PATH" : ""
+    def terra_exit = params.terra ? """PATH="\$(printf '%s\\n' "\$PATH" | sed 's|/opt/conda/envs/amrfinderplus/bin:||')" """ : ""
+    def organism = "${organism_param[0]}" != "No Match Found" ? "--organism ${organism_param[0]}" : ""
     //get name of amrfinder database file
     db_name = db.toString() - '.tar.gz'
     """
diff --git a/workflows/phoenix.nf b/workflows/phoenix.nf
index 4d8d7b07..00233ed0 100644
--- a/workflows/phoenix.nf
+++ b/workflows/phoenix.nf
@@ -330,37 +330,37 @@ workflow PHOENIX_EXTERNAL {
         )
         ch_versions = ch_versions.mix(DO_MLST.out.versions)
 
-        // /*// Fetch AMRFinder Database
-        // AMRFINDERPLUS_UPDATE( )
-        // ch_versions = ch_versions.mix(AMRFINDERPLUS_UPDATE.out.versions)*/
+        /*// Fetch AMRFinder Database
+        AMRFINDERPLUS_UPDATE( )
+        ch_versions = ch_versions.mix(AMRFINDERPLUS_UPDATE.out.versions)*/
 
-        // // Create file that has the organism name to pass to AMRFinder
-        // GET_TAXA_FOR_AMRFINDER (
-        //     DETERMINE_TAXA_ID.out.taxonomy
-        // )
-        // ch_versions = ch_versions.mix(GET_TAXA_FOR_AMRFINDER.out.versions)
+        // Create file that has the organism name to pass to AMRFinder
+        GET_TAXA_FOR_AMRFINDER (
+            DETERMINE_TAXA_ID.out.taxonomy
+        )
+        ch_versions = ch_versions.mix(GET_TAXA_FOR_AMRFINDER.out.versions)
 
-        // // Combining taxa and scaffolds to run amrfinder and get the point mutations.
-        // amr_channel = BBMAP_REFORMAT.out.filtered_scaffolds.map{                 meta, reads          -> [[id:meta.id], reads]}\
-        // .join(GET_TAXA_FOR_AMRFINDER.out.amrfinder_taxa.splitCsv(strip:true).map{meta, amrfinder_taxa -> [[id:meta.id], amrfinder_taxa ]}, by: [0])\
-        // .join(PROKKA.out.faa.map{                                                meta, faa            -> [[id:meta.id], faa ]},            by: [0])\
-        // .join(PROKKA.out.gff.map{                                                meta, gff            -> [[id:meta.id], gff ]},            by: [0])
+        // Combining taxa and scaffolds to run amrfinder and get the point mutations.
+        amr_channel = BBMAP_REFORMAT.out.filtered_scaffolds.map{                 meta, reads          -> [[id:meta.id], reads]}\
+        .join(GET_TAXA_FOR_AMRFINDER.out.amrfinder_taxa.splitCsv(strip:true).map{meta, amrfinder_taxa -> [[id:meta.id], amrfinder_taxa ]}, by: [0])\
+        .join(PROKKA.out.faa.map{                                                meta, faa            -> [[id:meta.id], faa ]},            by: [0])\
+        .join(PROKKA.out.gff.map{                                                meta, gff            -> [[id:meta.id], gff ]},            by: [0])
 
-        // // Run AMRFinder
-        // AMRFINDERPLUS_RUN (
-        //     amr_channel, params.amrfinder_db
-        // )
-        // ch_versions = ch_versions.mix(AMRFINDERPLUS_RUN.out.versions)
+        // Run AMRFinder
+        AMRFINDERPLUS_RUN (
+            amr_channel, params.amrfinder_db
+        )
+        ch_versions = ch_versions.mix(AMRFINDERPLUS_RUN.out.versions)
 
-        // // Combining determined taxa with the assembly stats based on meta.id
-        // assembly_ratios_ch = DETERMINE_TAXA_ID.out.taxonomy.map{meta, taxonomy   -> [[id:meta.id], taxonomy]}\
-        // .join(QUAST.out.report_tsv.map{                         meta, report_tsv -> [[id:meta.id], report_tsv]}, by: [0])
+        // Combining determined taxa with the assembly stats based on meta.id
+        assembly_ratios_ch = DETERMINE_TAXA_ID.out.taxonomy.map{meta, taxonomy   -> [[id:meta.id], taxonomy]}\
+        .join(QUAST.out.report_tsv.map{                         meta, report_tsv -> [[id:meta.id], report_tsv]}, by: [0])
 
-        // // Calculating the assembly ratio and gather GC% stats
-        // CALCULATE_ASSEMBLY_RATIO (
-        //     assembly_ratios_ch, params.ncbi_assembly_stats
-        // )
-        // ch_versions = ch_versions.mix(CALCULATE_ASSEMBLY_RATIO.out.versions)
+        // Calculating the assembly ratio and gather GC% stats
+        CALCULATE_ASSEMBLY_RATIO (
+            assembly_ratios_ch, params.ncbi_assembly_stats
+        )
+        ch_versions = ch_versions.mix(CALCULATE_ASSEMBLY_RATIO.out.versions)
 
         // GENERATE_PIPELINE_STATS_WF (
         //     GET_RAW_STATS.out.combined_raw_stats, \

From 2700945ba7bf073b714e5f09fce100c63948f969 Mon Sep 17 00:00:00 2001
From: slsevilla <slsevilla@gmail.com>
Date: Wed, 3 Apr 2024 15:43:16 +0000
Subject: [PATCH 33/40] refactor: summary lines ica #149

---
 modules/local/phoenix_summary.nf      |   9 +--
 modules/local/phoenix_summary_line.nf |   9 +--
 workflows/phoenix.nf                  | 108 +++++++++++++++-----------
 3 files changed, 68 insertions(+), 58 deletions(-)

diff --git a/modules/local/phoenix_summary.nf b/modules/local/phoenix_summary.nf
index a7250efc..1b268c1b 100644
--- a/modules/local/phoenix_summary.nf
+++ b/modules/local/phoenix_summary.nf
@@ -13,23 +13,20 @@ process GATHER_SUMMARY_LINES {
     path("versions.yml")             , emit: versions
 
     script: // This script is bundled with the pipeline, in cdcgov/phoenix/bin/
-    // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory.
-    if (params.ica==false) { ica = "" } 
-    else if (params.ica==true) { ica = "python ${workflow.launchDir}/bin/" }
-    else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." }
     // define variables
     def busco_parameter = busco_val ? "--busco" : ""
     def container_version = "base_v2.1.0"
     def container = task.container.toString() - "quay.io/jvhagey/phoenix@"
+    def script = params.ica ? "python ${params.ica_path}/Create_phoenix_summary_tsv.py" : "Create_phoenix_summary_tsv.py"
     """
-    ${ica}Create_phoenix_summary_tsv.py \\
+    ${script} \\
         --out Phoenix_Summary.tsv \\
         $busco_parameter
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         python: \$(python --version | sed 's/Python //g')
-        Create_phoenix_summary_tsv.py: \$(${ica}Create_phoenix_summary_tsv.py --version )
+        Create_phoenix_summary_tsv.py: \$(${script} --version )
         phoenix_base_container_tag: ${container_version}
         phoenix_base_container: ${container}
     END_VERSIONS
diff --git a/modules/local/phoenix_summary_line.nf b/modules/local/phoenix_summary_line.nf
index 8effa698..e4aefef1 100644
--- a/modules/local/phoenix_summary_line.nf
+++ b/modules/local/phoenix_summary_line.nf
@@ -23,10 +23,6 @@ process CREATE_SUMMARY_LINE {
     path("versions.yml")     , emit: versions
 
     script: // This script is bundled with the pipeline, in cdcgov/phoenix/bin/
-    // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory.
-    if (params.ica==false) { ica = "" } 
-    else if (params.ica==true) { ica = "python ${workflow.launchDir}/bin/" }
-    else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." }
     // define variables
     def prefix = task.ext.prefix ?: "${meta.id}"
     // allowing for some optional parameters for -entry SCAFFOLDS/CDC_SCAFFOLDS nothing should be passed.
@@ -35,8 +31,9 @@ process CREATE_SUMMARY_LINE {
     def fastani_file    = fastani ? "-f $fastani" : ""
     def container_version = "base_v2.1.0"
     def container = task.container.toString() - "quay.io/jvhagey/phoenix@"
+    def script = params.ica ? "python ${params.ica_path}/Phoenix_summary_line.py" : "Phoenix_summary_line.oy"
     """
-    ${ica}Phoenix_summary_line.py \\
+    ${script} \\
         -q $quast_report \\
         $trimmed_qc_data \\
         -a $ar_gamma_file \\
@@ -55,7 +52,7 @@ process CREATE_SUMMARY_LINE {
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         python: \$(python --version | sed 's/Python //g')
-        Phoenix_summary_line.py: \$(${ica}Phoenix_summary_line.py --version )
+        Phoenix_summary_line.py: \$(${script} --version )
         phoenix_base_container_tag: ${container_version}
         phoenix_base_container: ${container}
     END_VERSIONS
diff --git a/workflows/phoenix.nf b/workflows/phoenix.nf
index 00233ed0..4646b080 100644
--- a/workflows/phoenix.nf
+++ b/workflows/phoenix.nf
@@ -362,52 +362,68 @@ workflow PHOENIX_EXTERNAL {
         )
         ch_versions = ch_versions.mix(CALCULATE_ASSEMBLY_RATIO.out.versions)
 
-        // GENERATE_PIPELINE_STATS_WF (
-        //     GET_RAW_STATS.out.combined_raw_stats, \
-        //     GET_TRIMD_STATS.out.fastp_total_qc, \
-        //     [], \
-        //     KRAKEN2_TRIMD.out.report, \
-        //     KRAKEN2_TRIMD.out.krona_html, \
-        //     KRAKEN2_TRIMD.out.k2_bh_summary, \
-        //     RENAME_FASTA_HEADERS.out.renamed_scaffolds, \
-        //     BBMAP_REFORMAT.out.filtered_scaffolds, \
-        //     DO_MLST.out.checked_MLSTs, \
-        //     GAMMA_HV.out.gamma, \
-        //     GAMMA_AR.out.gamma, \
-        //     GAMMA_PF.out.gamma, \
-        //     QUAST.out.report_tsv, \
-        //     [], [], [], [], \
-        //     KRAKEN2_WTASMBLD.out.report, \
-        //     KRAKEN2_WTASMBLD.out.krona_html, \
-        //     KRAKEN2_WTASMBLD.out.k2_bh_summary, \
-        //     DETERMINE_TAXA_ID.out.taxonomy, \
-        //     FORMAT_ANI.out.ani_best_hit, \
-        //     CALCULATE_ASSEMBLY_RATIO.out.ratio, \
-        //     AMRFINDERPLUS_RUN.out.mutation_report, \
-        //     CALCULATE_ASSEMBLY_RATIO.out.gc_content, \
-        //     false
-        // )
-        // ch_versions = ch_versions.mix(GENERATE_PIPELINE_STATS_WF.out.versions)
-
-        // // Combining output based on meta.id to create summary by sample -- is this verbose, ugly and annoying? yes, if anyone has a slicker way to do this we welcome the input.
-        // line_summary_ch = GET_TRIMD_STATS.out.fastp_total_qc.map{meta, fastp_total_qc  -> [[id:meta.id], fastp_total_qc]}\
-        // .join(DO_MLST.out.checked_MLSTs.map{                             meta, checked_MLSTs   -> [[id:meta.id], checked_MLSTs]},   by: [0])\
-        // .join(GAMMA_HV.out.gamma.map{                                    meta, gamma           -> [[id:meta.id], gamma]},           by: [0])\
-        // .join(GAMMA_AR.out.gamma.map{                                    meta, gamma           -> [[id:meta.id], gamma]},           by: [0])\
-        // .join(GAMMA_PF.out.gamma.map{                                    meta, gamma           -> [[id:meta.id], gamma]},           by: [0])\
-        // .join(QUAST.out.report_tsv.map{                                  meta, report_tsv      -> [[id:meta.id], report_tsv]},      by: [0])\
-        // .join(CALCULATE_ASSEMBLY_RATIO.out.ratio.map{                    meta, ratio           -> [[id:meta.id], ratio]},           by: [0])\
-        // .join(GENERATE_PIPELINE_STATS_WF.out.pipeline_stats.map{         meta, pipeline_stats  -> [[id:meta.id], pipeline_stats]},  by: [0])\
-        // .join(DETERMINE_TAXA_ID.out.taxonomy.map{                        meta, taxonomy        -> [[id:meta.id], taxonomy]},        by: [0])\
-        // .join(KRAKEN2_TRIMD.out.k2_bh_summary.map{                       meta, k2_bh_summary   -> [[id:meta.id], k2_bh_summary]},   by: [0])\
-        // .join(AMRFINDERPLUS_RUN.out.report.map{                          meta, report          -> [[id:meta.id], report]},          by: [0])\
-        // .join(FORMAT_ANI.out.ani_best_hit.map{                           meta, ani_best_hit    -> [[id:meta.id], ani_best_hit]},    by: [0])
-
-        // // Generate summary per sample that passed SPAdes
-        // CREATE_SUMMARY_LINE (
-        //     line_summary_ch
-        // )
-        // ch_versions = ch_versions.mix(CREATE_SUMMARY_LINE.out.versions)
+        // prepare inputs to the stats wf
+        if (params.run_srst2_mlst){
+            fullgene_results=SRST2_TRIMD_AR.out.fullgene_results
+        } else {
+            fullgene_results=[]
+        }
+        if (params.asmbld){
+            asmbld_report=KRAKEN2_ASMBLD.out.report                 // channel: tuple (meta) path(report)
+            asmbld_krona_html=KRAKEN2_ASMBLD.out.krona_html         // channel: tuple (meta) path(krona_html)
+            asmbld_k2_bh_summary=KRAKEN2_ASMBLD.out.k2_bh_summary   // channel: tuple (meta) path(k2_bh_summary)
+        } else{
+            asmbld_report=[]
+            asmbld_krona_html=[]
+            asmbld_k2_bh_summary=[]
+        }
+
+        GENERATE_PIPELINE_STATS_WF (
+            GET_RAW_STATS.out.combined_raw_stats, \
+            GET_TRIMD_STATS.out.fastp_total_qc, \
+            fullgene_results, \
+            KRAKEN2_TRIMD.out.report, \
+            KRAKEN2_TRIMD.out.krona_html, \
+            KRAKEN2_TRIMD.out.k2_bh_summary, \
+            RENAME_FASTA_HEADERS.out.renamed_scaffolds, \
+            BBMAP_REFORMAT.out.filtered_scaffolds, \
+            DO_MLST.out.checked_MLSTs, \
+            GAMMA_HV.out.gamma, \
+            GAMMA_AR.out.gamma, \
+            GAMMA_PF.out.gamma, \
+            QUAST.out.report_tsv, \
+            params.busco, asmbld_report, asmbld_krona_html, asmbld_k2_bh_summary, \
+            KRAKEN2_WTASMBLD.out.report, \
+            KRAKEN2_WTASMBLD.out.krona_html, \
+            KRAKEN2_WTASMBLD.out.k2_bh_summary, \
+            DETERMINE_TAXA_ID.out.taxonomy, \
+            FORMAT_ANI.out.ani_best_hit, \
+            CALCULATE_ASSEMBLY_RATIO.out.ratio, \
+            AMRFINDERPLUS_RUN.out.mutation_report, \
+            CALCULATE_ASSEMBLY_RATIO.out.gc_content, \
+            params.extended_qc
+        )
+        ch_versions = ch_versions.mix(GENERATE_PIPELINE_STATS_WF.out.versions) 
+
+        // Combining output based on meta.id to create summary by sample -- is this verbose, ugly and annoying? yes, if anyone has a slicker way to do this we welcome the input.
+        line_summary_ch = GET_TRIMD_STATS.out.fastp_total_qc.map{meta, fastp_total_qc  -> [[id:meta.id], fastp_total_qc]}\
+        .join(DO_MLST.out.checked_MLSTs.map{                             meta, checked_MLSTs   -> [[id:meta.id], checked_MLSTs]},   by: [0])\
+        .join(GAMMA_HV.out.gamma.map{                                    meta, gamma           -> [[id:meta.id], gamma]},           by: [0])\
+        .join(GAMMA_AR.out.gamma.map{                                    meta, gamma           -> [[id:meta.id], gamma]},           by: [0])\
+        .join(GAMMA_PF.out.gamma.map{                                    meta, gamma           -> [[id:meta.id], gamma]},           by: [0])\
+        .join(QUAST.out.report_tsv.map{                                  meta, report_tsv      -> [[id:meta.id], report_tsv]},      by: [0])\
+        .join(CALCULATE_ASSEMBLY_RATIO.out.ratio.map{                    meta, ratio           -> [[id:meta.id], ratio]},           by: [0])\
+        .join(GENERATE_PIPELINE_STATS_WF.out.pipeline_stats.map{         meta, pipeline_stats  -> [[id:meta.id], pipeline_stats]},  by: [0])\
+        .join(DETERMINE_TAXA_ID.out.taxonomy.map{                        meta, taxonomy        -> [[id:meta.id], taxonomy]},        by: [0])\
+        .join(KRAKEN2_TRIMD.out.k2_bh_summary.map{                       meta, k2_bh_summary   -> [[id:meta.id], k2_bh_summary]},   by: [0])\
+        .join(AMRFINDERPLUS_RUN.out.report.map{                          meta, report          -> [[id:meta.id], report]},          by: [0])\
+        .join(FORMAT_ANI.out.ani_best_hit.map{                           meta, ani_best_hit    -> [[id:meta.id], ani_best_hit]},    by: [0])
+
+        // Generate summary per sample that passed SPAdes
+        CREATE_SUMMARY_LINE (
+            line_summary_ch
+        )
+        ch_versions = ch_versions.mix(CREATE_SUMMARY_LINE.out.versions)
 
         // // Collect all the summary files prior to fetch step to force the fetch process to wait
         // failed_summaries_ch = SPADES_WF.out.line_summary.collect().ifEmpty(params.placeholder) // if no spades failure pass empty file to keep it moving...

From 9437d8d5d73e144b1c003cc54c96b1db49dcc78b Mon Sep 17 00:00:00 2001
From: slsevilla <slsevilla@gmail.com>
Date: Thu, 4 Apr 2024 03:13:10 +0000
Subject: [PATCH 34/40] chore: fix missed ica flag

---
 modules/local/check_mlst.nf | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/modules/local/check_mlst.nf b/modules/local/check_mlst.nf
index d84beead..1ee95fdc 100644
--- a/modules/local/check_mlst.nf
+++ b/modules/local/check_mlst.nf
@@ -14,9 +14,6 @@ process CHECK_MLST {
 
     script:
     // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory.
-    if (params.ica==false) { ica = "" } 
-    else if (params.ica==true) { ica = "python ${workflow.launchDir}/bin/" }
-    else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." }
     def container_version = "base_v2.1.0"
     def container = task.container.toString() - "quay.io/jvhagey/phoenix@"
     def script = params.ica ? "python ${params.ica_path}/fix_MLST2.py" : "fix_MLST2.py"

From bb780f33f12b1c2a602bf705de0e2c4981742862 Mon Sep 17 00:00:00 2001
From: slsevilla <slsevilla@gmail.com>
Date: Thu, 4 Apr 2024 03:14:34 +0000
Subject: [PATCH 35/40] refactor: summary lines ica #149

---
 modules/local/phoenix_summary.nf      |  1 -
 modules/local/phoenix_summary_line.nf |  2 +-
 workflows/phoenix.nf                  | 57 ++++++++++++++-------------
 3 files changed, 31 insertions(+), 29 deletions(-)

diff --git a/modules/local/phoenix_summary.nf b/modules/local/phoenix_summary.nf
index 1b268c1b..39f42ef4 100644
--- a/modules/local/phoenix_summary.nf
+++ b/modules/local/phoenix_summary.nf
@@ -5,7 +5,6 @@ process GATHER_SUMMARY_LINES {
 
     input:
     path(summary_line_files)
-    path(outdir_path)
     val(busco_val)
 
     output:
diff --git a/modules/local/phoenix_summary_line.nf b/modules/local/phoenix_summary_line.nf
index e4aefef1..c4d7fc29 100644
--- a/modules/local/phoenix_summary_line.nf
+++ b/modules/local/phoenix_summary_line.nf
@@ -31,7 +31,7 @@ process CREATE_SUMMARY_LINE {
     def fastani_file    = fastani ? "-f $fastani" : ""
     def container_version = "base_v2.1.0"
     def container = task.container.toString() - "quay.io/jvhagey/phoenix@"
-    def script = params.ica ? "python ${params.ica_path}/Phoenix_summary_line.py" : "Phoenix_summary_line.oy"
+    def script = params.ica ? "python ${params.ica_path}/Phoenix_summary_line.py" : "Phoenix_summary_line.py"
     """
     ${script} \\
         -q $quast_report \\
diff --git a/workflows/phoenix.nf b/workflows/phoenix.nf
index 4646b080..55b073e8 100644
--- a/workflows/phoenix.nf
+++ b/workflows/phoenix.nf
@@ -392,7 +392,7 @@ workflow PHOENIX_EXTERNAL {
             GAMMA_AR.out.gamma, \
             GAMMA_PF.out.gamma, \
             QUAST.out.report_tsv, \
-            params.busco, asmbld_report, asmbld_krona_html, asmbld_k2_bh_summary, \
+            params.run_busco, asmbld_report, asmbld_krona_html, asmbld_k2_bh_summary, \
             KRAKEN2_WTASMBLD.out.report, \
             KRAKEN2_WTASMBLD.out.krona_html, \
             KRAKEN2_WTASMBLD.out.k2_bh_summary, \
@@ -425,33 +425,36 @@ workflow PHOENIX_EXTERNAL {
         )
         ch_versions = ch_versions.mix(CREATE_SUMMARY_LINE.out.versions)
 
-        // // Collect all the summary files prior to fetch step to force the fetch process to wait
-        // failed_summaries_ch = SPADES_WF.out.line_summary.collect().ifEmpty(params.placeholder) // if no spades failure pass empty file to keep it moving...
-        // // If you only run one sample and it fails spades there is nothing in the create line summary so pass an empty list to keep it moving...
-        // summaries_ch = CREATE_SUMMARY_LINE.out.line_summary.collect().ifEmpty( [] )
+        // Collect all the summary files prior to fetch step to force the fetch process to wait
+        failed_summaries_ch = SPADES_WF.out.line_summary.collect().ifEmpty(params.placeholder) // if no spades failure pass empty file to keep it moving...
+        // If you only run one sample and it fails spades there is nothing in the create line summary so pass an empty list to keep it moving...
+        summaries_ch = CREATE_SUMMARY_LINE.out.line_summary.collect().ifEmpty( [] )
 
-        // // This will check the output directory for an files ending in "_summaryline_failure.tsv" and add them to the output channel
-        // FETCH_FAILED_SUMMARIES (
-        //     outdir_path, failed_summaries_ch, summaries_ch
-        // )
-        // ch_versions = ch_versions.mix(FETCH_FAILED_SUMMARIES.out.versions)
-
-        // // combine all line summaries into one channel
-        // spades_failure_summaries_ch = FETCH_FAILED_SUMMARIES.out.spades_failure_summary_line
-        // fairy_summary_ch = CORRUPTION_CHECK.out.summary_line.collect().ifEmpty( [] )\
-        // .combine(GET_RAW_STATS.out.summary_line.collect().ifEmpty( [] ))\
-        // .combine(GET_TRIMD_STATS.out.summary_line.collect().ifEmpty( [] ))\
-        // .combine(SCAFFOLD_COUNT_CHECK.out.summary_line.collect().ifEmpty( [] ))\
-        // .ifEmpty( [] )
-
-        // // pulling it all together
-        // all_summaries_ch = spades_failure_summaries_ch.combine(failed_summaries_ch).combine(summaries_ch).combine(fairy_summary_ch)
-
-        // // Combining sample summaries into final report
-        // GATHER_SUMMARY_LINES (
-        //     all_summaries_ch, outdir_path, false
-        // )
-        // ch_versions = ch_versions.mix(GATHER_SUMMARY_LINES.out.versions)
+        // This will check the output directory for an files ending in "_summaryline_failure.tsv" and add them to the output channel
+        FETCH_FAILED_SUMMARIES (
+            outdir_path, failed_summaries_ch, summaries_ch
+        )
+        ch_versions = ch_versions.mix(FETCH_FAILED_SUMMARIES.out.versions)
+
+        // combine all line summaries into one channel
+        spades_failure_summaries_ch = FETCH_FAILED_SUMMARIES.out.spades_failure_summary_line
+        fairy_summary_ch = CORRUPTION_CHECK.out.summary_line.collect().ifEmpty( [] )\
+            .combine(GET_RAW_STATS.out.summary_line.collect().ifEmpty( [] ))\
+            .combine(GET_TRIMD_STATS.out.summary_line.collect().ifEmpty( [] ))\
+            .combine(SCAFFOLD_COUNT_CHECK.out.summary_line.collect().ifEmpty( [] ))\
+            .ifEmpty( [] )
+
+        // pulling it all together
+        all_summaries_ch = spades_failure_summaries_ch
+            .combine(failed_summaries_ch)
+            .combine(summaries_ch)
+            .combine(fairy_summary_ch)
+
+        // Combining sample summaries into final report
+        GATHER_SUMMARY_LINES (
+            all_summaries_ch, params.run_busco
+        )
+        ch_versions = ch_versions.mix(GATHER_SUMMARY_LINES.out.versions)
 
         // //create GRiPHin report
         // GRIPHIN (

From a33862285000ca2da04ab89541bbd398e44d69a6 Mon Sep 17 00:00:00 2001
From: slsevilla <slsevilla@gmail.com>
Date: Thu, 4 Apr 2024 03:35:53 +0000
Subject: [PATCH 36/40] feat: flags for execution #153

---
 nextflow.config | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/nextflow.config b/nextflow.config
index 660127e1..bb7d2165 100755
--- a/nextflow.config
+++ b/nextflow.config
@@ -18,14 +18,14 @@ params {
 
     // Run flags
     run_busco                   = false
-
+    run_srst2_mlst              = false
+    run_griphin                 = false
 
     // Additional input parameters for -entry SCAFFOLDS and CDC_SCAFFOLDS
     indir                       = null
     scaffolds_ext               = '.scaffolds.fa.gz'
     extended_qc                 = false
 
-
     // Params for filtering
     minlength                   = 500
     phred                       = 33
@@ -37,9 +37,10 @@ params {
     // Additional input parameters for -entry SRA and CDC_SRA
     input_sra                   = null
     use_sra                     = false
+    asmbld                      = false
 
     // For NCBI spreadsheet creation
-    ncbi_excel_creation         = true
+    ncbi_excel_creation         = false
     microbe_example             = "${baseDir}/assets/Microbe.1.0_Example_Data.xlsx"
     sra_metadata                = "${baseDir}/assets/SRA_metadata_example.xlsx"
     osii_bioprojects            = "${baseDir}/assets/osii-bioprojects.yaml"

From 28f902916dc2fd8782709b59c7ce605444d14c8f Mon Sep 17 00:00:00 2001
From: slsevilla <slsevilla@gmail.com>
Date: Thu, 4 Apr 2024 03:36:10 +0000
Subject: [PATCH 37/40] refactor: griphin ica #149

---
 modules/local/griphin.nf | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/modules/local/griphin.nf b/modules/local/griphin.nf
index e64e9133..1d20535c 100755
--- a/modules/local/griphin.nf
+++ b/modules/local/griphin.nf
@@ -18,23 +18,20 @@ process GRIPHIN {
     path("versions.yml"),              emit: versions
 
     script: // This script is bundled with the pipeline, in cdcgov/phoenix/bin/
-    // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory.
-    if (params.ica==false) { ica = "" } 
-    else if (params.ica==true) { ica = "python ${workflow.launchDir}/bin/" }
-    else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." }
     // define variables
     def phoenix = entry ? "--phoenix" : ""
     def scaffolds = scaffolds_entry ? "--scaffolds" : ""
     def container = task.container.toString() - "quay.io/jvhagey/phoenix:"
+    def script = params.ica ? "python ${params.ica_path}/GRiPHin.py" : "GRiPHin.py"
     """
     full_path=\$(readlink -f ${outdir})
 
-    ${ica}GRiPHin.py -d \$full_path -a $db --output ${outdir} --coverage ${coverage} ${phoenix} ${scaffolds}
+    ${script} -d \$full_path -a $db --output ${outdir} --coverage ${coverage} ${phoenix} ${scaffolds}
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
        python: \$(python --version | sed 's/Python //g')
-       griphin.py: \$(${ica}GRiPHin.py --version)
+       griphin.py: \$(${script} --version)
        phoenix_base_container: ${container}
     END_VERSIONS
     """

From 2168e14cce593529cb38cc1c0262a5c920c0d22c Mon Sep 17 00:00:00 2001
From: slsevilla <slsevilla@gmail.com>
Date: Tue, 9 Apr 2024 03:28:13 +0000
Subject: [PATCH 38/40] chore: unblock expected outputs

---
 main.nf | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/main.nf b/main.nf
index 4635262c..83f89e31 100755
--- a/main.nf
+++ b/main.nf
@@ -53,20 +53,19 @@ workflow PHOENIX {
     main:
         PHOENIX_EXTERNAL ( ch_input, ch_versions, params.ncbi_excel_creation )
     emit:
-        check = PHOENIX_EXTERNAL.out.check
-        // scaffolds        = PHOENIX_EXTERNAL.out.scaffolds
-        // trimmed_reads    = PHOENIX_EXTERNAL.out.trimmed_reads
-        // mlst             = PHOENIX_EXTERNAL.out.mlst
-        // amrfinder_output = PHOENIX_EXTERNAL.out.amrfinder_output
-        // gamma_ar         = PHOENIX_EXTERNAL.out.gamma_ar
-        // phx_summary      = PHOENIX_EXTERNAL.out.phx_summary
-        // //output for phylophoenix
-        // griphin_tsv      = PHOENIX_EXTERNAL.out.griphin_tsv
-        // griphin_excel    = PHOENIX_EXTERNAL.out.griphin_excel
-        // dir_samplesheet  = PHOENIX_EXTERNAL.out.dir_samplesheet
-        // //output for ncbi upload 
-        // ncbi_sra_sheet       = params.create_ncbi_sheet ? PHOENIX_EXTERNAL.out.ncbi_sra_sheet : null
-        // ncbi_biosample_sheet = params.create_ncbi_sheet ? PHOENIX_EXTERNAL.out.ncbi_biosample_sheet : null
+        scaffolds        = PHOENIX_EXTERNAL.out.scaffolds
+        trimmed_reads    = PHOENIX_EXTERNAL.out.trimmed_reads
+        mlst             = PHOENIX_EXTERNAL.out.mlst
+        amrfinder_output = PHOENIX_EXTERNAL.out.amrfinder_output
+        gamma_ar         = PHOENIX_EXTERNAL.out.gamma_ar
+        phx_summary      = PHOENIX_EXTERNAL.out.phx_summary
+        //output for phylophoenix
+        griphin_tsv      = params.run_griphin ? PHOENIX_EXTERNAL.out.griphin_tsv : null
+        griphin_excel    = params.run_griphin ? PHOENIX_EXTERNAL.out.griphin_excel : null
+        dir_samplesheet  = params.run_griphin ? PHOENIX_EXTERNAL.out.dir_samplesheet : null
+        //output for ncbi upload 
+        ncbi_sra_sheet       = params.create_ncbi_sheet ? PHOENIX_EXTERNAL.out.ncbi_sra_sheet : null
+        ncbi_biosample_sheet = params.create_ncbi_sheet ? PHOENIX_EXTERNAL.out.ncbi_biosample_sheet : null
 }
 
 //

From 4556847eb9505e5dd8c16b34726b6f2b8c868e62 Mon Sep 17 00:00:00 2001
From: slsevilla <slsevilla@gmail.com>
Date: Tue, 9 Apr 2024 03:29:14 +0000
Subject: [PATCH 39/40] chore: unblock griphin, outputs

---
 workflows/phoenix.nf | 62 +++++++++++++++++++++++++-------------------
 1 file changed, 35 insertions(+), 27 deletions(-)

diff --git a/workflows/phoenix.nf b/workflows/phoenix.nf
index 55b073e8..a4b3feeb 100644
--- a/workflows/phoenix.nf
+++ b/workflows/phoenix.nf
@@ -450,21 +450,31 @@ workflow PHOENIX_EXTERNAL {
             .combine(summaries_ch)
             .combine(fairy_summary_ch)
 
-        // Combining sample summaries into final report
-        GATHER_SUMMARY_LINES (
-            all_summaries_ch, params.run_busco
-        )
-        ch_versions = ch_versions.mix(GATHER_SUMMARY_LINES.out.versions)
-
-        // //create GRiPHin report
-        // GRIPHIN (
-        //     all_summaries_ch, INPUT_CHECK.out.valid_samplesheet, params.ardb, outdir_path, params.coverage, true, false
+        // // Combining sample summaries into final report
+        // GATHER_SUMMARY_LINES (
+        //     all_summaries_ch, params.run_busco
         // )
-        // ch_versions = ch_versions.mix(GRIPHIN.out.versions)
+        // ch_versions = ch_versions.mix(GATHER_SUMMARY_LINES.out.versions)
 
-        // if (ncbi_excel_creation == true && params.create_ncbi_sheet == true) {
+        // //create GRiPHin report
+        // if(params.run_griphin) {
+        //     GRIPHIN (
+        //         all_summaries_ch, INPUT_CHECK.out.valid_samplesheet, params.ardb, outdir_path, params.coverage, true, false
+        //     )
+        //     ch_versions = ch_versions.mix(GRIPHIN.out.versions)
+        // }
+        
+        // // Create NCBI sheet
+        // if (params.ncbi_excel_creation == true && params.create_ncbi_sheet == true) {
         //     // requiring files so that this process doesn't start until needed files are made. 
-        //     required_files_ch = FASTP_TRIMD.out.reads.map{ meta, reads -> reads[0]}.collect().combine(DO_MLST.out.checked_MLSTs.map{ meta, checked_MLSTs -> checked_MLSTs}.collect()).combine(DETERMINE_TAXA_ID.out.taxonomy.map{ meta, taxonomy -> taxonomy}.collect())
+        //     required_files_ch = FASTP_TRIMD.out.reads
+        //         .map{ meta, reads -> reads[0]}
+        //         .collect()
+        //         .combine(DO_MLST.out.checked_MLSTs
+        //             .map{ meta, checked_MLSTs -> checked_MLSTs}
+        //             .collect())
+        //         .combine(DETERMINE_TAXA_ID.out.taxonomy
+        //         .map{ meta, taxonomy -> taxonomy}.collect())
 
         //     //Fill out NCBI excel sheets for upload based on what PHX found
         //     CREATE_NCBI_UPLOAD_SHEET (
@@ -504,21 +514,19 @@ workflow PHOENIX_EXTERNAL {
         // ch_versions    = ch_versions.mix(MULTIQC.out.versions)
     
     emit:
-        check = FASTP_TRIMD.out.reads
-        // outcome = GET_RAW_STATS.out.outcome
-        // scaffolds        = BBMAP_REFORMAT.out.filtered_scaffolds
-        // trimmed_reads    = FASTP_TRIMD.out.reads
-        // mlst             = DO_MLST.out.checked_MLSTs
-        // amrfinder_output = AMRFINDERPLUS_RUN.out.report
-        // gamma_ar         = GAMMA_AR.out.gamma
-        // phx_summary     = GATHER_SUMMARY_LINES.out.summary_report
-        // //output for phylophoenix
-        // griphin_tsv      = GRIPHIN.out.griphin_report
-        // griphin_excel    = GRIPHIN.out.griphin_tsv_report
-        // dir_samplesheet  = GRIPHIN.out.converted_samplesheet
-        // //output for ncbi upload 
-        // ncbi_sra_sheet       = params.create_ncbi_sheet ? CREATE_NCBI_UPLOAD_SHEET.out.ncbi_sra : null
-        // ncbi_biosample_sheet = params.create_ncbi_sheet ? CREATE_NCBI_UPLOAD_SHEET.out.ncbi_biosample : null
+        scaffolds        = BBMAP_REFORMAT.out.filtered_scaffolds
+        trimmed_reads    = FASTP_TRIMD.out.reads
+        mlst             = DO_MLST.out.checked_MLSTs
+        amrfinder_output = AMRFINDERPLUS_RUN.out.report
+        gamma_ar         = GAMMA_AR.out.gamma
+        phx_summary      = GATHER_SUMMARY_LINES.out.summary_report
+        //output for phylophoenix
+        griphin_tsv      = params.run_griphin ? GRIPHIN.out.griphin_report : null
+        griphin_excel    = params.run_griphin ? GRIPHIN.out.griphin_tsv_report : null
+        dir_samplesheet  = params.run_griphin ? GRIPHIN.out.converted_samplesheet : null
+        //output for ncbi upload 
+        ncbi_sra_sheet       = params.create_ncbi_sheet ? CREATE_NCBI_UPLOAD_SHEET.out.ncbi_sra : null
+        ncbi_biosample_sheet = params.create_ncbi_sheet ? CREATE_NCBI_UPLOAD_SHEET.out.ncbi_biosample : null
 }
 
 /*

From 46f43a4537157d796b534b8b717c572b5f44c921 Mon Sep 17 00:00:00 2001
From: slsevilla <slsevilla@gmail.com>
Date: Tue, 9 Apr 2024 03:34:38 +0000
Subject: [PATCH 40/40] docs: changes added to log

---
 CHANGELOG.md | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 55874b35..d4e04661 100755
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -243,3 +243,23 @@ Below are the list of changes to phx since is initial release. As fixes can take
    - [ARG-ANNOT](http://backup.mediterranee-infection.com/arkotheque/client/ihumed/_depot_arko/articles/2041/arg-annot-v4-aa-may2018_doc.fasta) hasn't changed since the last time the database was created and contains updates since version [NT v6 July 2019](https://www.mediterranee-infection.com/acces-ressources/base-de-donnees/arg-annot-2/)  
    - [ResFinder](https://bitbucket.org/genomicepidemiology/resfinder_db/src/master/)  
       - Includes until 2024-01-28 [commit 97d1fe0cd0a119172037f6bdb29f8a1c7c6e6019](https://bitbucket.org/genomicepidemiology/resfinder_db/commits/branch/master)  
+
+## [v3.1.0](https://github.com/CDCgov/phoenix/releases/tag/v3.1.0) (04/08/2024)
+**Implemented Enhancements**
+- refactors filtering failed samples for fairy
+- refactors ICA handling, terra handling
+- add a param flags in nextflow.config
+    - execution-based
+        - run_busco
+        - ncbi_excel_creation
+        - extended_qc
+        - run_srst2_mlst
+        - run_griphin
+    - feature-based
+        - save_trimmed_fail
+        - save_merged
+        - save_output_fastqs
+        - save_reads_assignment
+- moves parameter checks upstream to main.nf
+    - ICA
+    - TERRA