diff --git a/CHANGELOG.md b/CHANGELOG.md index 55874b35..d4e04661 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -243,3 +243,23 @@ Below are the list of changes to phx since is initial release. As fixes can take - [ARG-ANNOT](http://backup.mediterranee-infection.com/arkotheque/client/ihumed/_depot_arko/articles/2041/arg-annot-v4-aa-may2018_doc.fasta) hasn't changed since the last time the database was created and contains updates since version [NT v6 July 2019](https://www.mediterranee-infection.com/acces-ressources/base-de-donnees/arg-annot-2/) - [ResFinder](https://bitbucket.org/genomicepidemiology/resfinder_db/src/master/) - Includes until 2024-01-28 [commit 97d1fe0cd0a119172037f6bdb29f8a1c7c6e6019](https://bitbucket.org/genomicepidemiology/resfinder_db/commits/branch/master) + +## [v3.1.0](https://github.com/CDCgov/phoenix/releases/tag/v3.1.0) (04/08/2024) +**Implemented Enhancements** +- refactors filtering failed samples for fairy +- refactors ICA handling, terra handling +- add a param flags in nextflow.config + - execution-based + - run_busco + - ncbi_excel_creation + - extended_qc + - run_srst2_mlst + - run_griphin + - feature-based + - save_trimmed_fail + - save_merged + - save_output_fastqs + - save_reads_assignment +- moves parameter checks upstream to main.nf + - ICA + - TERRA diff --git a/conf/modules.config b/conf/modules.config index 1e91a084..62982ad7 100755 --- a/conf/modules.config +++ b/conf/modules.config @@ -43,7 +43,7 @@ process { [ path: { "${params.outdir}/${meta.id}/file_integrity" }, mode: 'copy', - pattern: "*{_summary.txt}" + pattern: "*{_summary_fairy.txt}" ] ] } @@ -67,7 +67,7 @@ process { [ path: { "${params.outdir}/${meta.id}/file_integrity" }, mode: 'copy', - pattern: "*{_summary.txt}" + pattern: "*{_summary_rawstats.txt}" ] ] } diff --git a/main.nf b/main.nf index 50b9daef..83f89e31 100755 --- a/main.nf +++ b/main.nf @@ -43,15 +43,15 @@ workflow PHOENIX { // Check input path parameters to see if they exist def checkPathParamList = [ params.input, params.multiqc_config, params.kraken2db] //removed , params.fasta to stop issue w/connecting to aws and igenomes not used for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } - - // Check mandatory parameters + if (params.ica != true && params.ica != false) {exit 1, "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods."} + if (params.terra != true && params.terra != false) {exit 1, "Please set params.terra to either \"true\" if running on terra or \"false\" for all other methods."} //input on command line if (params.input) { ch_input = file(params.input) } else { exit 1, 'For -entry PHOENIX: Input samplesheet not specified!' } ch_versions = Channel.empty() // Used to collect the software versions - + main: - PHOENIX_EXTERNAL ( ch_input, ch_versions, true ) + PHOENIX_EXTERNAL ( ch_input, ch_versions, params.ncbi_excel_creation ) emit: scaffolds = PHOENIX_EXTERNAL.out.scaffolds trimmed_reads = PHOENIX_EXTERNAL.out.trimmed_reads @@ -60,9 +60,9 @@ workflow PHOENIX { gamma_ar = PHOENIX_EXTERNAL.out.gamma_ar phx_summary = PHOENIX_EXTERNAL.out.phx_summary //output for phylophoenix - griphin_tsv = PHOENIX_EXTERNAL.out.griphin_tsv - griphin_excel = PHOENIX_EXTERNAL.out.griphin_excel - dir_samplesheet = PHOENIX_EXTERNAL.out.dir_samplesheet + griphin_tsv = params.run_griphin ? PHOENIX_EXTERNAL.out.griphin_tsv : null + griphin_excel = params.run_griphin ? PHOENIX_EXTERNAL.out.griphin_excel : null + dir_samplesheet = params.run_griphin ? PHOENIX_EXTERNAL.out.dir_samplesheet : null //output for ncbi upload ncbi_sra_sheet = params.create_ncbi_sheet ? PHOENIX_EXTERNAL.out.ncbi_sra_sheet : null ncbi_biosample_sheet = params.create_ncbi_sheet ? PHOENIX_EXTERNAL.out.ncbi_biosample_sheet : null @@ -83,6 +83,9 @@ workflow CDC_PHOENIX { if (params.input) { ch_input = file(params.input) } else { exit 1, 'For -entry CDC_PHOENIX: Input samplesheet not specified!' } ch_versions = Channel.empty() // Used to collect the software versions + // true is for -entry CDC_PHOENIX and CDC_SCAFFOLDS - used in SPADES + extended_qc=false + main: PHOENIX_EXQC ( ch_input, ch_versions, true ) diff --git a/modules/local/bbduk.nf b/modules/local/bbduk.nf index b409dbbb..e4b1e8f9 100755 --- a/modules/local/bbduk.nf +++ b/modules/local/bbduk.nf @@ -13,10 +13,6 @@ process BBDUK { tuple val(meta), path('*.log') , emit: log path "versions.yml" , emit: versions - when: - //if the files are not corrupt and there are equal number of reads in each file then run bbduk - "${fairy_outcome[0]}" == "PASSED: File ${meta.id}_R1 is not corrupt." && "${fairy_outcome[1]}" == "PASSED: File ${meta.id}_R2 is not corrupt." && "${fairy_outcome[2]}" == "PASSED: Read pairs for ${meta.id} are equal." - script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" diff --git a/modules/local/check_mlst.nf b/modules/local/check_mlst.nf index 8c02f03a..1ee95fdc 100644 --- a/modules/local/check_mlst.nf +++ b/modules/local/check_mlst.nf @@ -12,23 +12,18 @@ process CHECK_MLST { tuple val(meta), path("*_status.txt"), emit: status path("versions.yml") , emit: versions - when: - task.ext.when == null || task.ext.when - script: // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory. - if (params.ica==false) { ica = "" } - else if (params.ica==true) { ica = "python ${workflow.launchDir}/bin/" } - else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." } def container_version = "base_v2.1.0" def container = task.container.toString() - "quay.io/jvhagey/phoenix@" + def script = params.ica ? "python ${params.ica_path}/fix_MLST2.py" : "fix_MLST2.py" """ - ${ica}fix_MLST2.py --input $mlst_file --taxonomy $taxonomy_file --mlst_database ${local_dbases} + ${script} --input $mlst_file --taxonomy $taxonomy_file --mlst_database ${local_dbases} cat <<-END_VERSIONS > versions.yml "${task.process}": python: \$(python --version | sed 's/Python //g') - fix_MLST2.py: \$(${ica}fix_MLST2.py --version ) + fix_MLST2.py: \$(${script} --version ) phoenix_base_container_tag: ${container_version} phoenix_base_container: ${container} END_VERSIONS diff --git a/modules/local/check_mlst_with_srst2.nf b/modules/local/check_mlst_with_srst2.nf index eb2476d3..753e41d2 100644 --- a/modules/local/check_mlst_with_srst2.nf +++ b/modules/local/check_mlst_with_srst2.nf @@ -17,18 +17,15 @@ process CHECK_MLST_WITH_SRST2 { task.ext.when == null || task.ext.when script: - // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory. - if (params.ica==false) { ica = "" } - else if (params.ica==true) { ica = "python ${workflow.launchDir}/bin/" } - else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." } // define variables def container_version = "base_v2.1.0" def container = task.container.toString() - "quay.io/jvhagey/phoenix@" + def script = params.ica ? "python ${params.ica_path}/fix_MLST2.py" : "fix_MLST2.py" """ if [[ "${status[0]}" == "True" ]]; then - ${ica}fix_MLST2.py --input $mlst_file --srst2 $srst2_file --taxonomy $taxonomy_file --mlst_database $local_dbases + ${script} --input $mlst_file --srst2 $srst2_file --taxonomy $taxonomy_file --mlst_database $local_dbases elif [[ "${status[0]}" == "False" ]]; then - ${ica}fix_MLST2.py --input $mlst_file --taxonomy $taxonomy_file --mlst_database $local_dbases + ${script} --input $mlst_file --taxonomy $taxonomy_file --mlst_database $local_dbases else echo "Something went very wrong, please open an issue on Github for the PHoeNIx developers to address." fi @@ -36,7 +33,7 @@ process CHECK_MLST_WITH_SRST2 { cat <<-END_VERSIONS > versions.yml "${task.process}": python: \$(python --version | sed 's/Python //g') - fix_MLST2.py: \$(${ica}fix_MLST2.py --version ) + fix_MLST2.py: \$(${script} --version ) phoenix_base_container_tag: ${container_version} phoenix_base_container: ${container} END_VERSIONS diff --git a/modules/local/determine_taxa_id.nf b/modules/local/determine_taxa_id.nf index 5f826a76..b5cf971f 100644 --- a/modules/local/determine_taxa_id.nf +++ b/modules/local/determine_taxa_id.nf @@ -14,20 +14,17 @@ process DETERMINE_TAXA_ID { path("versions.yml") , emit: versions script: // This script is bundled with the pipeline, in cdcgov/phoenix/bin/ - // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory. - if (params.ica==false) { ica = "" } - else if (params.ica==true) { ica = "bash ${workflow.launchDir}/bin/" } - else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." } // define variables def prefix = task.ext.prefix ?: "${meta.id}" // -r needs to be last as in -entry SCAFFOLDS/CDC_SCAFFOLDS k2_bh_summary is not passed so its a blank argument def k2_bh_file = k2_bh_summary ? "-r $k2_bh_summary" : "" def container_version = "base_v2.1.0" def container = task.container.toString() - "quay.io/jvhagey/phoenix@" + def script = params.ica ? "${params.ica_path}/determine_taxID.sh" : "determine_taxID.sh" """ - ${ica}determine_taxID.sh -k $kraken_weighted -s $meta.id -f $formatted_ani_file -d $nodes_file -m $names_file $k2_bh_file + ${script} -k $kraken_weighted -s $meta.id -f $formatted_ani_file -d $nodes_file -m $names_file $k2_bh_file - script_version=\$(${ica}determine_taxID.sh -V) + script_version=\$(${script} -V) cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/determine_taxa_id_failure.nf b/modules/local/determine_taxa_id_failure.nf index e7aa7b82..6a11e57b 100644 --- a/modules/local/determine_taxa_id_failure.nf +++ b/modules/local/determine_taxa_id_failure.nf @@ -17,18 +17,15 @@ process DETERMINE_TAXA_ID_FAILURE { "${spades_outcome[0]}" == "run_failure" || "${spades_outcome[1]}" == "no_scaffolds" || "${spades_outcome[2]}" == "no_contigs" script: // This script is bundled with the pipeline, in cdcgov/phoenix/bin/ - // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory. - if (params.ica==false) { ica = "" } - else if (params.ica==true) { ica = "bash ${workflow.launchDir}/bin/" } - else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." } // define variables def prefix = task.ext.prefix ?: "${meta.id}" def container_version = "base_v2.1.0" def container = task.container.toString() - "quay.io/jvhagey/phoenix@" + def script = params.ica ? "bash ${params.ica_path}/determine_taxID.sh" : "determine_taxID.sh" """ - ${ica}determine_taxID.sh -r $k2_bh_summary -s $meta.id -d $nodes_file -m $names_file + ${script} -r $k2_bh_summary -s $meta.id -d $nodes_file -m $names_file - script_version=\$(${ica}determine_taxID.sh -V) + script_version=\$(${script} -V) cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/determine_top_mash_hits.nf b/modules/local/determine_top_mash_hits.nf index 7b809a56..94f47217 100644 --- a/modules/local/determine_top_mash_hits.nf +++ b/modules/local/determine_top_mash_hits.nf @@ -17,25 +17,19 @@ process DETERMINE_TOP_MASH_HITS { "${fairy_outcome[4]}" == "PASSED: More than 0 scaffolds in ${meta.id} after filtering." script: // This script is bundled with the pipeline, in cdcgov/phoenix/bin/ - // terra=true sets paths for bc/wget for terra container paths - if (params.terra==false) { terra = ""} - else if (params.terra==true) { terra = "-t terra" } - else { error "Please set params.terra to either \"true\" or \"false\"" } - // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory. - if (params.ica==false) { ica = "" } - else if (params.ica==true) { ica = "bash ${workflow.launchDir}/bin/" } - else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." } // define variables def prefix = task.ext.prefix ?: "${meta.id}" def sample_name = "${mash_dists}" - ".txt" //get full sample name with REFSEQ_DATE def container_version = "base_v2.1.0" def container = task.container.toString() - "quay.io/jvhagey/phoenix@" + def script = params.ica ? "${params.ica_path}/sort_and_prep_dist.sh" : "sort_and_prep_dist.sh" + def terra = params.terra ? "-t terra" : "" """ mkdir reference_dir - ${ica}sort_and_prep_dist.sh -a $assembly_scaffolds -x $mash_dists -o reference_dir $terra + ${script} -a $assembly_scaffolds -x $mash_dists -o reference_dir $terra - script_version=\$(${ica}sort_and_prep_dist.sh -V) + script_version=\$(${script} -V) if [[ ! -f ${sample_name}_best_MASH_hits.txt ]]; then echo "No MASH hit found" > ${sample_name}_best_MASH_hits.txt diff --git a/modules/local/fairy_corruption_check.nf b/modules/local/fairy_corruption_check.nf index b09c675b..104c9c6b 100644 --- a/modules/local/fairy_corruption_check.nf +++ b/modules/local/fairy_corruption_check.nf @@ -9,17 +9,12 @@ process CORRUPTION_CHECK { val(busco_val) output: - tuple val(meta), path('*_summary.txt'), emit: outcome - tuple val(meta), path('*_summary_old.txt'), emit: outcome_to_edit + tuple val(meta), path('*_summary_fairy.txt'), emit: outcome path('*_summaryline.tsv'), optional:true, emit: summary_line tuple val(meta), path('*.synopsis'), optional:true, emit: synopsis path("versions.yml"), emit: versions script: - // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory. - if (params.ica==false) { ica = "" } - else if (params.ica==true) { ica = "bash ${workflow.launchDir}/bin/" } - else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." } // define variables def prefix = task.ext.prefix ?: "${meta.id}" def num1 = "${reads[0]}".minus(".fastq.gz") @@ -27,17 +22,18 @@ process CORRUPTION_CHECK { def busco_parameter = busco_val ? "-b" : "" def container_version = "base_v2.1.0" def container = task.container.toString() - "quay.io/jvhagey/phoenix@" - """ + def script = params.ica ? "python ${params.ica_path}/fairy_proc.sh" : "fairy_proc.sh" +""" #set +e #check for file integrity and log errors #if there is a corruption problem the script will create a *_summaryline.tsv and *.synopsis file for the sample. - ${ica}fairy_proc.sh -r ${reads[0]} -p ${prefix} ${busco_parameter} - ${ica}fairy_proc.sh -r ${reads[1]} -p ${prefix} ${busco_parameter} + ${script} -r ${reads[0]} -p ${prefix} ${busco_parameter} + ${script} -r ${reads[1]} -p ${prefix} ${busco_parameter} - script_version=\$(${ica}fairy_proc.sh -V) + script_version=\$(${script} -V) #making a copy of the summary file to pass to READ_COUNT_CHECKS to handle file names being the same - cp ${prefix}_summary.txt ${prefix}_summary_old.txt + mv ${prefix}_summary.txt ${prefix}_summary_fairy.txt cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/fairy_scaffold_count_check.nf b/modules/local/fairy_scaffold_count_check.nf index b7f914d2..30a67c45 100644 --- a/modules/local/fairy_scaffold_count_check.nf +++ b/modules/local/fairy_scaffold_count_check.nf @@ -17,25 +17,12 @@ process SCAFFOLD_COUNT_CHECK { path(names_file) output: - tuple val(meta), path('*_summary.txt'), emit: outcome - path('*_summaryline.tsv'), optional:true, emit: summary_line - tuple val(meta), path('*.synopsis'), optional:true, emit: synopsis - path("versions.yml"), emit: versions + tuple val(meta), path('*_summary_complete.txt'), emit: outcome + path('*_summaryline.tsv'), optional:true, emit: summary_line + tuple val(meta), path('*.synopsis'), optional:true, emit: synopsis + path("versions.yml"), emit: versions script: - // terra=true sets paths for bc/wget for terra container paths - if (params.terra==false) { terra = ""} - else if (params.terra==true) { terra = "-2 terra" } - else { error "Please set params.terra to either \"true\" or \"false\"" } - // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory. - if (params.ica==false) { - ica_python = "" - ica_bash = "" - } else if (params.ica==true) { - ica_python = "python ${workflow.launchDir}/bin/" - ica_bash = "bash ${workflow.launchDir}/bin/" - } - else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." } // define variables def prefix = task.ext.prefix ?: "${meta.id}" def fairy_read_count_outcome_file = fairy_read_count_outcome ? "$fairy_read_count_outcome" : "" @@ -49,58 +36,82 @@ process SCAFFOLD_COUNT_CHECK { def extended_qc_arg = extended_qc ? "--extended_qc" : "" def container_version = "base_v2.1.0" def container = task.container.toString() - "quay.io/jvhagey/phoenix@" + def script_id = params.ica ? "${params.ica_path}/determine_taxID.sh" : "determine_taxID.sh" + def script_writer = params.ica ? "${params.ica_path}/pipeline_stats_writer.sh" : "pipeline_stats_writer.sh" + def script_summary = params.ica ? "python ${params.ica_path}/Phoenix_summary_line.py" : "Phoenix_summary_line.py" + def script_edit = params.ica ? "python ${params.ica_path}/edit_line_summary.py" : "edit_line_summary.py" + def terra = params.terra ? "-2 terra" : "" """ - #checking that the output contains scaffolds still: + # set new final script name + complete_summary="${prefix}_summary_complete.txt" + + # handle -entry SCAFFOLDS + scaffold_entry_file() { + cat <<> \${complete_summary} + PASSED: Using Scaffold entry no corruption check run on R1. + PASSED: Using Scaffold entry no corruption check run on R2. + PASSED: Using Scaffold entry no paired reads to check. + PASSED: Using Scaffold entry no trimd reads to check. + FAILED: No scaffolds in ${prefix} after filtering! + EOT + } + + # checking that the output contains scaffolds still: if grep "Output: 0 reads (0.00%) 0 bases (0.00%)" ${bbmap_log}; then #Check if the file exists already (it won't with -entry SCAFFOLDS) - if [ -f ${prefix}_summary_old_3.txt ]; then - #replace end of line with actual error message - sed -i 's/End_of_File/FAILED: No scaffolds in ${prefix} after filtering!/' ${fairy_read_count_outcome_file} + if [ -f ${fairy_read_count_outcome} ]; then + # replace end of line with actual error message + cp ${fairy_read_count_outcome} \${complete_summary} + sed -i 's/End_of_File/FAILED: No scaffolds in ${prefix} after filtering!/' \${complete_summary} else - echo "PASSED: Using Scaffold entry no corruption check run on R1." > ${prefix}_summary_old_3.txt - echo "PASSED: Using Scaffold entry no corruption check run on R2." >> ${prefix}_summary_old_3.txt - echo "PASSED: Using Scaffold entry no paired reads to check." >> ${prefix}_summary_old_3.txt - echo "PASSED: Using Scaffold entry no trimd reads to check." >> ${prefix}_summary_old_3.txt - echo "FAILED: No scaffolds in ${prefix} after filtering!" >> ${prefix}_summary_old_3.txt + scaffold_entry_file + echo "FAILED: No scaffolds in ${prefix} after filtering!" >> \${complete_summary} fi # if the sample has no scaffolds left make the summaryline and synopsis file for it. # get taxa ID - ${ica_bash}determine_taxID.sh -r $kraken2_trimd_summary -s ${prefix} -d $nodes_file -m $names_file + ${script_id} -r $kraken2_trimd_summary -s ${prefix} -d $nodes_file -m $names_file - #write synopsis file - ${ica_bash}pipeline_stats_writer.sh -d ${prefix} -q ${prefix}.tax -5 $coverage $raw_qc $fastp_total_qc_pipeline_stats \\ - $kraken2_trimd_report $kraken2_trimd_summary_pipeline_stats $krona_trimd $terra + # write synopsis file + ${script_writer} \\ + -d ${prefix} \\ + -q ${prefix}.tax \\ + -5 $coverage \\ + $raw_qc \\ + $fastp_total_qc_pipeline_stats \\ + $kraken2_trimd_report \\ + $kraken2_trimd_summary_pipeline_stats \\ + $krona_trimd $terra # write summary_line file - ${ica_python}Phoenix_summary_line.py -n ${prefix} -s ${prefix}.synopsis -x ${prefix}.tax -o ${prefix}_summaryline.tsv\\ - $kraken2_trimd_summary_summaryline $fastp_total_qc_summaryline $extended_qc_arg + ${script_summary} \\ + -n ${prefix} \\ + -s ${prefix}.synopsis \\ + -x ${prefix}.tax + -o ${prefix}_summaryline.tsv\\ + $kraken2_trimd_summary_summaryline \\ + $fastp_total_qc_summaryline \\ + $extended_qc_arg # change pass to fail and add in error - ${ica_python}edit_line_summary.py -i ${prefix}_summaryline.tsv - - #change file name. - cp ${prefix}_summary_old_3.txt ${prefix}_summary.txt + ${script_edit} -i ${prefix}_summaryline.tsv # if there are scaffolds left after filtering do the following... else #Check if the file exists already (it won't with -entry SCAFFOLDS) - if [ -f ${prefix}_summary_old_3.txt ]; then + if [ -f ${fairy_read_count_outcome} ]; then #replace end of line with actual error message - sed -i 's/End_of_File/PASSED: More than 0 scaffolds in ${prefix} after filtering./' ${fairy_read_count_outcome_file} + cp ${fairy_read_count_outcome} \${complete_summary} + sed -i 's/End_of_File/PASSED: More than 0 scaffolds in ${prefix} after filtering./' \${complete_summary} else - echo "PASSED: Using Scaffold entry no corruption check run on R1." > ${prefix}_summary_old_3.txt - echo "PASSED: Using Scaffold entry no corruption check run on R2." >> ${prefix}_summary_old_3.txt - echo "PASSED: Using Scaffold entry no paired reads to check." >> ${prefix}_summary_old_3.txt - echo "PASSED: Using Scaffold entry no trimd reads to check." >> ${prefix}_summary_old_3.txt - echo "PASSED: More than 0 scaffolds in ${prefix} after filtering." >> ${prefix}_summary_old_3.txt + scaffold_entry_file + echo "PASSED: More than 0 scaffolds in ${prefix} after filtering." >> \${complete_summary} fi - cp ${prefix}_summary_old_3.txt ${prefix}_summary.txt fi #gettings script versions - dettaxid_version=\$(${ica_bash}determine_taxID.sh -V) - pipestats_version=\$(${ica_bash}pipeline_stats_writer.sh -V) + dettaxid_version=\$(${script_id} -V) + pipestats_version=\$(${script_writer} -V) cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -109,8 +120,8 @@ process SCAFFOLD_COUNT_CHECK { phoenix_base_container: ${container} \${dettaxid_version} \${pipestats_version} - Phoenix_summary_line.py: \$(${ica_python}Phoenix_summary_line.py --version ) - edit_line_summary.py: \$(${ica_python}edit_line_summary.py --version ) + Phoenix_summary_line.py: \$(${script_summary} --version ) + edit_line_summary.py: \$(${script_edit} --version ) END_VERSIONS """ -} +} \ No newline at end of file diff --git a/modules/local/fastp_singles.nf b/modules/local/fastp_singles.nf index 6328f8ed..dd4a1c60 100755 --- a/modules/local/fastp_singles.nf +++ b/modules/local/fastp_singles.nf @@ -19,14 +19,11 @@ process FASTP_SINGLES { task.ext.when == null || task.ext.when script: - // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory. - if (params.ica==false) { ica = "" } - else if (params.ica==true) { ica = "bash ${workflow.launchDir}/bin/" } - else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." } // define variables def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def container = task.container.toString() - "staphb/fastp@" + def script = params.ica ? "${params.ica_path}/create_empty_fastp_json.sh" : "create_empty_fastp_json.sh" """ echo "Debugging: Emptiness of reads[0] and reads[1]" > debug_status.log if [[ ! -s ${reads[0]} ]] && [[ ! -s ${reads[1]} ]]; then @@ -34,7 +31,7 @@ process FASTP_SINGLES { echo "!!!!! - Both are empty" # Both are empty, do nothing??? Nope we handle now #Create psuedo file as empty aint cutting it - ${ica}create_empty_fastp_json.sh -n ${prefix} + ${script} -n ${prefix} touch "${prefix}_empty.html" touch ${prefix}.singles.fastq gzip ${prefix}.singles.fastq @@ -69,7 +66,7 @@ process FASTP_SINGLES { 2> ${prefix}.fastp.log fi - script_version=\$(${ica}create_empty_fastp_json.sh -V) + script_version=\$(${script} -V) cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/fastqc.nf b/modules/local/fastqc.nf index aa7cf58c..863971ce 100755 --- a/modules/local/fastqc.nf +++ b/modules/local/fastqc.nf @@ -7,10 +7,6 @@ process FASTQC { input: tuple val(meta), path(reads), val(fairy_outcome) - when: - //if there are scaffolds left after filtering - "${fairy_outcome[3]}" == "PASSED: There are reads in ${meta.id} R1/R2 after trimming." - output: tuple val(meta), path("*.html"), emit: html tuple val(meta), path("*.zip") , emit: zip diff --git a/modules/local/format_ANI_best_hit.nf b/modules/local/format_ANI_best_hit.nf index bafb13e5..fd706a63 100644 --- a/modules/local/format_ANI_best_hit.nf +++ b/modules/local/format_ANI_best_hit.nf @@ -12,18 +12,12 @@ process FORMAT_ANI { path("versions.yml"), emit: versions script: // This script is bundled with the pipeline, in cdcgov/phoenix/bin/ - // terra=true sets paths for bc/wget for terra container paths - if (params.terra==false) { terra = ""} - else if (params.terra==true) { terra = "-t terra" } - else { error "Please set params.terra to either \"true\" or \"false\"" } - // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory. - if (params.ica==false) { ica = "" } - else if (params.ica==true) { ica = "bash ${workflow.launchDir}/bin/" } - else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." } // define variables def prefix = task.ext.prefix ?: "${meta.id}" def container_version = "base_v2.1.0" def container = task.container.toString() - "quay.io/jvhagey/phoenix@" + def script = params.ica ? "${params.ica_path}/ANI_best_hit_formatter.sh" : "ANI_best_hit_formatter.sh" + def terra = params.terra ? "-t terra" : "" """ line=\$(head -n1 ${ani_file}) if [[ "\${line}" == "Mash/FastANI Error:"* ]]; then @@ -35,10 +29,10 @@ process FORMAT_ANI { db_version="REFSEQ_unknown" fi # script also checks that match is 80 or > otherwise an error is thrown - ${ica}ANI_best_hit_formatter.sh -a ${ani_file} -n ${prefix} -d \${db_version} ${terra} + ${script} -a ${ani_file} -n ${prefix} -d \${db_version} ${terra} fi - script_version=\$(${ica}ANI_best_hit_formatter.sh -V) + script_version=\$(${script} -V) cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/gamma.nf b/modules/local/gamma.nf index 93c8a1db..1cc433d7 100755 --- a/modules/local/gamma.nf +++ b/modules/local/gamma.nf @@ -15,10 +15,6 @@ process GAMMA { tuple val(meta), path("*.fasta"), optional:true , emit: fasta path "versions.yml" , emit: versions - when: - //if there are scaffolds left after filtering - "${fairy_outcome[4]}" == "PASSED: More than 0 scaffolds in ${meta.id} after filtering." - script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" diff --git a/modules/local/generate_pipeline_stats_failure.nf b/modules/local/generate_pipeline_stats_failure.nf index af4181fd..a0fa9e79 100644 --- a/modules/local/generate_pipeline_stats_failure.nf +++ b/modules/local/generate_pipeline_stats_failure.nf @@ -22,20 +22,14 @@ process GENERATE_PIPELINE_STATS_FAILURE { "${spades_outcome[0]}" == "run_failure" || "${spades_outcome[1]}" == "no_scaffolds" || "${spades_outcome[2]}" == "no_contigs" script: // This script is bundled with the pipeline, in cdcgov/phoenix/bin/ - // terra=true sets paths for bc/wget for terra container paths - if (params.terra==false) { terra = ""} - else if (params.terra==true) { terra = "-2 terra" } - else { error "Please set params.terra to either \"true\" or \"false\"" } - // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory. - if (params.ica==false) { ica = "" } - else if (params.ica==true) { ica = "bash ${workflow.launchDir}/bin/" } - else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." } // define variables def prefix = task.ext.prefix ?: "${meta.id}" def container_version = "base_v2.1.0" def container = task.container.toString() - "quay.io/jvhagey/phoenix@" + def script = params.ica ? "bash ${params.ica_path}/pipeline_stats_writer.sh" : "pipeline_stats_writer.sh" + def terra = params.terra ? "-2 terra" : "" """ - ${ica}pipeline_stats_writer.sh \\ + ${script} \\ -a $raw_qc \\ -b $fastp_total_qc \\ -d ${prefix} \\ @@ -46,7 +40,7 @@ process GENERATE_PIPELINE_STATS_FAILURE { -5 $coverage \\ $terra - script_version=\$(${ica}pipeline_stats_writer.sh -V) + script_version=\$(${script} -V) cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/generate_pipeline_stats_failure_exqc.nf b/modules/local/generate_pipeline_stats_failure_exqc.nf index 6006d332..9cf7fabc 100644 --- a/modules/local/generate_pipeline_stats_failure_exqc.nf +++ b/modules/local/generate_pipeline_stats_failure_exqc.nf @@ -23,21 +23,15 @@ process GENERATE_PIPELINE_STATS_FAILURE_EXQC { "${spades_outcome[0]}" == "run_failure" || "${spades_outcome[1]}" == "no_scaffolds" || "${spades_outcome[2]}" == "no_contigs" script: // This script is bundled with the pipeline, in cdcgov/phoenix/bin/ - // terra=true sets paths for bc/wget for terra container paths - if (params.terra==false) { terra = ""} - else if (params.terra==true) { terra = "-2 terra" } - else { error "Please set params.terra to either \"true\" or \"false\"" } - // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory. - if (params.ica==false) { ica = "" } - else if (params.ica==true) { ica = "bash ${workflow.launchDir}/bin/" } - else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." } // define variables def prefix = task.ext.prefix ?: "${meta.id}" def container_version = "base_v2.1.0" def container = task.container.toString() - "quay.io/jvhagey/phoenix@" + def script = params.ica ? "bash ${params.ica_path}/pipeline_stats_writer.sh" : "pipeline_stats_writer.sh" + def terra = params.terra ? "-2 terra" : "" """ # this runs with -entry CDC_PHEONIX when SPAdes fails (creates contigs and not scaffolds) - ${ica}pipeline_stats_writer.sh \\ + ${script} \\ -a $raw_qc \\ -b $fastp_total_qc \\ -d ${prefix} \\ @@ -49,7 +43,7 @@ process GENERATE_PIPELINE_STATS_FAILURE_EXQC { -5 $coverage \\ $terra - script_version=\$(${ica}pipeline_stats_writer.sh -V) + script_version=\$(${script} -V) cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/get_mlst_srst2.nf b/modules/local/get_mlst_srst2.nf index 24e66b6d..b8316d77 100755 --- a/modules/local/get_mlst_srst2.nf +++ b/modules/local/get_mlst_srst2.nf @@ -21,13 +21,10 @@ process GET_MLST_SRST2 { (task.ext.when == null || task.ext.when) //& "${status[0]}" == "False" script: - // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory. - if (params.ica==false) { ica = "" } - else if (params.ica==true) { ica = "python ${workflow.launchDir}/bin/" } - else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." } // define variables def prefix = task.ext.prefix ?: "${meta.id}" def container_version = task.container.toString() - "quay.io/biocontainers/python@" + def script = params.ica ? "python ${params.ica_path}/local_MLST_converter.py" : "local_MLST_converter.py" """ if [[ "${status[0]}" == "False" ]]; then genus="empty" @@ -47,7 +44,7 @@ process GET_MLST_SRST2 { echo "\${genus}___\${species}" # Old way, now use provided DB with different name format # convert_taxonomy_with_complexes_to_pubMLST.py --genus "\${genus}" --species "\${species}" > DB_defs.txt - ${ica}local_MLST_converter.py --genus "\${genus}" --species "\${species}" > DB_defs.txt + ${script} --genus "\${genus}" --species "\${species}" > DB_defs.txt dbline=\$(tail -n1 DB_defs.txt) echo "\$dbline" @@ -100,7 +97,7 @@ process GET_MLST_SRST2 { cat <<-END_VERSIONS > versions.yml "${task.process}": - local_MLST_converter.py: \$(${ica}local_MLST_converter.py --version ) + local_MLST_converter.py: \$(${script} --version ) python: \$(python --version | sed 's/Python //g') python_container: ${container_version} END_VERSIONS diff --git a/modules/local/get_raw_stats.nf b/modules/local/get_raw_stats.nf index aa3add05..24f1ceef 100644 --- a/modules/local/get_raw_stats.nf +++ b/modules/local/get_raw_stats.nf @@ -11,46 +11,40 @@ process GET_RAW_STATS { output: tuple val(meta), path('*_stats.txt'), emit: raw_stats tuple val(meta), path('*_raw_read_counts.txt'), emit: combined_raw_stats - tuple val(meta), path('*_summary.txt'), emit: outcome - path('*_summaryline.tsv'), optional:true, emit: summary_line - tuple val(meta), path('*_summary_old_2.txt'), emit: outcome_to_edit - tuple val(meta), path('*.synopsis'), optional:true, emit: synopsis + tuple val(meta), path('*_summary_rawstats.txt'), emit: outcome + path('*_summaryline.tsv'), optional:true, emit: summary_line + tuple val(meta), path('*.synopsis'), optional:true, emit: synopsis path("versions.yml"), emit: versions - when: - //if the files are not corrupt then get the read stats - "${fairy_corrupt_outcome[0]}" == "PASSED: File ${meta.id}_R1 is not corrupt." && "${fairy_corrupt_outcome[1]}" == "PASSED: File ${meta.id}_R2 is not corrupt." - script: // This script is bundled with the pipeline, in cdcgov/phoenix/bin/ - // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory. - if (params.ica==false) { ica = "" } - else if (params.ica==true) { ica = "python ${workflow.launchDir}/bin/" } - else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." } // define variables def prefix = task.ext.prefix ?: "${meta.id}" def busco_parameter = busco_val ? "--busco" : "" def container_version = "base_v2.1.0" def container = task.container.toString() - "quay.io/jvhagey/phoenix@" - def path_to_bin = "${workflow.launchDir}/bin/" + def script_q30 = params.ica ? "python ${params.ica_path}/q30.py" : "q30.py" + def script_stats = params.ica ? "python ${params.ica_path}/create_raw_stats_output.py" : "create_raw_stats_output.py" + def script_fairy = params.ica ? "python ${params.ica_path}/fairy.py" : "fairy.py" """ - ${ica}q30.py -i ${reads[0]} > ${prefix}_R1_stats.txt - ${ica}q30.py -i ${reads[1]} > ${prefix}_R2_stats.txt - ${ica}create_raw_stats_output.py -n ${prefix} -r1 ${prefix}_R1_stats.txt -r2 ${prefix}_R2_stats.txt + ${script_q30} -i ${reads[0]} > ${prefix}_R1_stats.txt + ${script_q30} -i ${reads[1]} > ${prefix}_R2_stats.txt + ${script_stats} -n ${prefix} -r1 ${prefix}_R1_stats.txt -r2 ${prefix}_R2_stats.txt - ## checking that read counts match before moving on + # making a copy of the summary file - this avoids writing to the previous file + cp ${fairy_outcome} ${prefix}_input.txt # Output check for messages indicating read pairs that do not match - ${ica}fairy.py -r ${prefix}_raw_read_counts.txt -f ${fairy_outcome} ${busco_parameter} + ${script_fairy} -r ${prefix}_raw_read_counts.txt -f ${prefix}_input.txt ${busco_parameter} - #making a copy of the summary file to pass to BBMAP_REFORMAT to handle file names being the same - cp ${prefix}_summary.txt ${prefix}_summary_old_2.txt + # rename output file + mv ${prefix}_summary.txt ${prefix}_summary_rawstats.txt cat <<-END_VERSIONS > versions.yml "${task.process}": python: \$(python --version | sed 's/Python //g') - q30.py: \$(${ica}q30.py --version ) - create_raw_stats_output.py: \$(${ica}create_raw_stats_output.py --version ) - fairy.py: \$(${ica}fairy.py --version ) + q30.py: \$(${script_q30} --version ) + create_raw_stats_output.py: \$(${script_stats} --version ) + fairy.py: \$(${script_fairy} --version ) phoenix_base_container_tag: ${container_version} phoenix_base_container: ${container} END_VERSIONS diff --git a/modules/local/get_taxa_for_amrfinder.nf b/modules/local/get_taxa_for_amrfinder.nf index 906b26ae..af6f46dc 100644 --- a/modules/local/get_taxa_for_amrfinder.nf +++ b/modules/local/get_taxa_for_amrfinder.nf @@ -12,16 +12,13 @@ process GET_TAXA_FOR_AMRFINDER { path("versions.yml"), emit: versions script: // This script is bundled with the pipeline, in cdcgov/phoenix/bin/ - // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory. - if (params.ica==false) { ica = "" } - else if (params.ica==true) { ica = "python ${workflow.launchDir}/bin/" } - else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." } // define variables def prefix = task.ext.prefix ?: "${meta.id}" def container_version = "base_v2.1.0" def container = task.container.toString() - "quay.io/jvhagey/phoenix@" + def script = params.ica ? "python ${params.ica_path}/get_taxa_for_amrfinder.py" : "get_taxa_for_amrfinder.py" """ - ${ica}get_taxa_for_amrfinder.py -t $taxa_file -o ${prefix}_AMRFinder_Organism.csv + ${script} -t $taxa_file -o ${prefix}_AMRFinder_Organism.csv cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/get_trimd_stats.nf b/modules/local/get_trimd_stats.nf index a14d4653..6ea2a575 100644 --- a/modules/local/get_trimd_stats.nf +++ b/modules/local/get_trimd_stats.nf @@ -13,41 +13,39 @@ process GET_TRIMD_STATS { output: tuple val(meta), path('*_trimmed_read_counts.txt'), emit: fastp_total_qc - tuple val(meta), path('*_summary.txt'), emit: outcome - path('*_summaryline.tsv'), optional:true, emit: summary_line - tuple val(meta), path('*_summary_old_3.txt'), emit: outcome_to_edit - tuple val(meta), path('*.synopsis'), optional:true, emit: synopsis + path('*_summaryline.tsv'), optional:true, emit: summary_line + tuple val(meta), path('*_summary_fastp.txt'), emit: outcome + tuple val(meta), path('*.synopsis'), optional:true, emit: synopsis path("versions.yml"), emit: versions script: // This script is bundled with the pipeline, in cdcgov/phoenix/bin/ - // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory. - if (params.ica==false) { ica = "" } - else if (params.ica==true) { ica = "python ${workflow.launchDir}/bin/" } - else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." } // define variables def prefix = task.ext.prefix ?: "${meta.id}" def busco_parameter = busco_val ? "--busco" : "" def container_version = "base_v2.1.0" def container = task.container.toString() - "quay.io/jvhagey/phoenix@" + def script_fastp = params.ica ? "python ${params.ica_path}/FastP_QC.py" : "FastP_QC.py" + def script_fairy = params.ica ? "python ${params.ica_path}/fairy.py" : "fairy.py" """ - ${ica}FastP_QC.py \\ + ${script_fastp} \\ --trimmed_json ${fastp_trimd_json} \\ --single_json ${fastp_singles_json} \\ --name ${prefix} - # Check that there are still reads in R1 and R2 before fastqc. If there aren't reads then fastqc dies. + # making a copy of the summary file - this avoids writing to the previous file + cp ${fairy_outcome} ${prefix}_input.txt # Output check for messages indicating there are no trimmed reads after filtering. - ${ica}fairy.py -r ${raw_qc} -f ${fairy_outcome} -t ${prefix}_trimmed_read_counts.txt ${busco_parameter} + ${script_fairy} -r ${raw_qc} -f ${prefix}_input.txt -t ${prefix}_trimmed_read_counts.txt ${busco_parameter} #making a copy of the summary file to pass to BBMAP_REFORMAT to handle file names being the same - cp ${prefix}_summary.txt ${prefix}_summary_old_3.txt + mv ${prefix}_summary.txt ${prefix}_summary_fastp.txt cat <<-END_VERSIONS > versions.yml "${task.process}": python: \$(python --version | sed 's/Python //g') - fairy.py: \$( ${ica}fairy.py --version ) - FastP_QC.py: \$(${ica}FastP_QC.py --version ) + fairy.py: \$( ${script_fairy} --version ) + FastP_QC.py: \$(${script_fastp} --version ) phoenix_base_container_tag: ${container_version} phoenix_base_container: ${container} END_VERSIONS diff --git a/modules/local/griphin.nf b/modules/local/griphin.nf index e64e9133..1d20535c 100755 --- a/modules/local/griphin.nf +++ b/modules/local/griphin.nf @@ -18,23 +18,20 @@ process GRIPHIN { path("versions.yml"), emit: versions script: // This script is bundled with the pipeline, in cdcgov/phoenix/bin/ - // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory. - if (params.ica==false) { ica = "" } - else if (params.ica==true) { ica = "python ${workflow.launchDir}/bin/" } - else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." } // define variables def phoenix = entry ? "--phoenix" : "" def scaffolds = scaffolds_entry ? "--scaffolds" : "" def container = task.container.toString() - "quay.io/jvhagey/phoenix:" + def script = params.ica ? "python ${params.ica_path}/GRiPHin.py" : "GRiPHin.py" """ full_path=\$(readlink -f ${outdir}) - ${ica}GRiPHin.py -d \$full_path -a $db --output ${outdir} --coverage ${coverage} ${phoenix} ${scaffolds} + ${script} -d \$full_path -a $db --output ${outdir} --coverage ${coverage} ${phoenix} ${scaffolds} cat <<-END_VERSIONS > versions.yml "${task.process}": python: \$(python --version | sed 's/Python //g') - griphin.py: \$(${ica}GRiPHin.py --version) + griphin.py: \$(${script} --version) phoenix_base_container: ${container} END_VERSIONS """ diff --git a/modules/local/kraken_bh.nf b/modules/local/kraken_bh.nf index 508a3531..a5f9af65 100644 --- a/modules/local/kraken_bh.nf +++ b/modules/local/kraken_bh.nf @@ -13,22 +13,16 @@ process KRAKEN_BEST_HIT { path("versions.yml") , emit: versions script: // This script is bundled with the pipeline, in cdcgov/phoenix/bin/ - // terra=true sets paths for bc/wget for terra container paths - if (params.terra==false) { terra = ""} - else if (params.terra==true) { terra = "-t terra" } - else { error "Please set params.terra to either \"true\" or \"false\"" } - // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory. - if (params.ica==false) { ica = "" } - else if (params.ica==true) { ica = "bash ${workflow.launchDir}/bin/" } - else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." } // define variables def prefix = task.ext.prefix ?: "${meta.id}" def container_version = "base_v2.1.0" def container = task.container.toString() - "quay.io/jvhagey/phoenix@" + def script = params.ica ? "${params.ica_path}/kraken2_best_hit.sh" : "kraken2_best_hit.sh" + def terra = params.terra ? "-t terra" : "" """ - ${ica}kraken2_best_hit.sh -i $kraken_summary -q $count_file -n ${prefix} $terra + ${script} -i $kraken_summary -q $count_file -n ${prefix} $terra - script_version=\$(${ica}kraken2_best_hit.sh -V) + script_version=\$(${script} -V) mv ${prefix}.summary.txt ${prefix}.kraken2_${kraken_type}.top_kraken_hit.txt diff --git a/modules/local/krakentools_kreport2krona.nf b/modules/local/krakentools_kreport2krona.nf index d4d3d2ae..c6bd89cf 100644 --- a/modules/local/krakentools_kreport2krona.nf +++ b/modules/local/krakentools_kreport2krona.nf @@ -13,17 +13,14 @@ process KRAKEN2_KRONA { path("versions.yml") , emit: versions script: // This script is bundled with the pipeline, in phoenix/bin/ orginally from https://github.com/jenniferlu717/KrakenTools on 6/15/2022 - // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory. - if (params.ica==false) { ica = "" } - else if (params.ica==true) { ica = "python ${workflow.launchDir}/bin/" } - else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." } // define variables def prefix = task.ext.prefix ?: "${meta.id}" def container_version = "base_v2.1.0" def krakentools_version = "1.2" def container = task.container.toString() - "quay.io/jvhagey/phoenix@" + def script = params.ica ? "python ${params.ica_path}/kreport2krona.py" : "kreport2krona.py" """ - ${ica}kreport2krona.py \\ + ${script} \\ --report ${kraken_report} \\ --output ${prefix}_${type}.krona diff --git a/modules/local/krakentools_kreport2mpa.nf b/modules/local/krakentools_kreport2mpa.nf index 01abaaa0..c14d4cb9 100644 --- a/modules/local/krakentools_kreport2mpa.nf +++ b/modules/local/krakentools_kreport2mpa.nf @@ -12,17 +12,14 @@ process KRAKENTOOLS_KREPORT2MPA { path("versions.yml") , emit: versions script: // This script is bundled with the pipeline, in phoenix/bin/ - // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory. - if (params.ica==false) { ica = "" } - else if (params.ica==true) { ica = "python ${workflow.launchDir}/bin/" } - else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." } // define variables def prefix = task.ext.prefix ?: "${meta.id}" def container_version = "base_v2.1.0" def krakentools_version = "1.2" def container = task.container.toString() - "quay.io/jvhagey/phoenix@" + def script = params.ica ? "python ${params.ica_path}/kreport2mpa.py" : "kreport2mpa.py" """ - ${ica}kreport2mpa.py \\ + ${script} \\ --report-file ${kraken_report} \\ --output ${prefix}.mpa diff --git a/modules/local/krakentools_makekreport.nf b/modules/local/krakentools_makekreport.nf index 508d1566..b86b66bc 100644 --- a/modules/local/krakentools_makekreport.nf +++ b/modules/local/krakentools_makekreport.nf @@ -13,17 +13,14 @@ process KRAKENTOOLS_MAKEKREPORT { script: // This script is bundled with the pipeline, in phoenix/bin/ // This script has to be run with kraken output that does not use --use-names flag https://github.com/jenniferlu717/KrakenTools/issues/29 - // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory. - if (params.ica==false) { ica = "" } - else if (params.ica==true) { ica = "python ${workflow.launchDir}/bin/" } - else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." } // define variables def prefix = task.ext.prefix ?: "${meta.id}" def container_version = "base_v2.1.0" def krakentools_version = "1.2" def container = task.container.toString() - "quay.io/jvhagey/phoenix@" + def script = params.ica ? "python ${params.ica_path}/make_kreport.py" : "make_kreport.py" """ - ${ica}make_kreport.py \\ + ${script} \\ --input ${kraken_output} \\ --output ${prefix}.kraken2_wtasmbld.summary.txt \\ --taxonomy ${kraken2db_path}/ktaxonomy.tsv \\ diff --git a/modules/local/mash_distance.nf b/modules/local/mash_distance.nf index af7d7ac9..f969b95b 100755 --- a/modules/local/mash_distance.nf +++ b/modules/local/mash_distance.nf @@ -11,10 +11,6 @@ process MASH_DIST { tuple val(meta), path("*.txt"), emit: dist path("versions.yml") , emit: versions - when: - //if there are scaffolds left after filtering - "${fairy_outcome[4]}" == "PASSED: More than 0 scaffolds in ${meta.id} after filtering." - script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" diff --git a/modules/local/mlst.nf b/modules/local/mlst.nf index ed14b0f9..4a324c62 100644 --- a/modules/local/mlst.nf +++ b/modules/local/mlst.nf @@ -16,10 +16,6 @@ process MLST { "${fairy_outcome[4]}" == "PASSED: More than 0 scaffolds in ${meta.id} after filtering." script: - // helps set correct paths to get database version being used - if (params.terra==false) { terra = false } - else if (params.terra==true) { terra = true} - else { error "Please set params.terra to either \"true\" or \"false\""} //define variables def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" @@ -27,6 +23,7 @@ process MLST { def container = task.container.toString() - "quay.io/jvhagey/mlst@" def mlst_version = "2.23.0_01242024" def mlst_version_clean = mlst_version.split("_")[0] + def terra = params.terra ? "true" : "false" """ if [[ ${fasta} = *.gz ]] then diff --git a/modules/local/phoenix_summary.nf b/modules/local/phoenix_summary.nf index a7250efc..39f42ef4 100644 --- a/modules/local/phoenix_summary.nf +++ b/modules/local/phoenix_summary.nf @@ -5,7 +5,6 @@ process GATHER_SUMMARY_LINES { input: path(summary_line_files) - path(outdir_path) val(busco_val) output: @@ -13,23 +12,20 @@ process GATHER_SUMMARY_LINES { path("versions.yml") , emit: versions script: // This script is bundled with the pipeline, in cdcgov/phoenix/bin/ - // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory. - if (params.ica==false) { ica = "" } - else if (params.ica==true) { ica = "python ${workflow.launchDir}/bin/" } - else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." } // define variables def busco_parameter = busco_val ? "--busco" : "" def container_version = "base_v2.1.0" def container = task.container.toString() - "quay.io/jvhagey/phoenix@" + def script = params.ica ? "python ${params.ica_path}/Create_phoenix_summary_tsv.py" : "Create_phoenix_summary_tsv.py" """ - ${ica}Create_phoenix_summary_tsv.py \\ + ${script} \\ --out Phoenix_Summary.tsv \\ $busco_parameter cat <<-END_VERSIONS > versions.yml "${task.process}": python: \$(python --version | sed 's/Python //g') - Create_phoenix_summary_tsv.py: \$(${ica}Create_phoenix_summary_tsv.py --version ) + Create_phoenix_summary_tsv.py: \$(${script} --version ) phoenix_base_container_tag: ${container_version} phoenix_base_container: ${container} END_VERSIONS diff --git a/modules/local/phoenix_summary_line.nf b/modules/local/phoenix_summary_line.nf index 8effa698..c4d7fc29 100644 --- a/modules/local/phoenix_summary_line.nf +++ b/modules/local/phoenix_summary_line.nf @@ -23,10 +23,6 @@ process CREATE_SUMMARY_LINE { path("versions.yml") , emit: versions script: // This script is bundled with the pipeline, in cdcgov/phoenix/bin/ - // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory. - if (params.ica==false) { ica = "" } - else if (params.ica==true) { ica = "python ${workflow.launchDir}/bin/" } - else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." } // define variables def prefix = task.ext.prefix ?: "${meta.id}" // allowing for some optional parameters for -entry SCAFFOLDS/CDC_SCAFFOLDS nothing should be passed. @@ -35,8 +31,9 @@ process CREATE_SUMMARY_LINE { def fastani_file = fastani ? "-f $fastani" : "" def container_version = "base_v2.1.0" def container = task.container.toString() - "quay.io/jvhagey/phoenix@" + def script = params.ica ? "python ${params.ica_path}/Phoenix_summary_line.py" : "Phoenix_summary_line.py" """ - ${ica}Phoenix_summary_line.py \\ + ${script} \\ -q $quast_report \\ $trimmed_qc_data \\ -a $ar_gamma_file \\ @@ -55,7 +52,7 @@ process CREATE_SUMMARY_LINE { cat <<-END_VERSIONS > versions.yml "${task.process}": python: \$(python --version | sed 's/Python //g') - Phoenix_summary_line.py: \$(${ica}Phoenix_summary_line.py --version ) + Phoenix_summary_line.py: \$(${script} --version ) phoenix_base_container_tag: ${container_version} phoenix_base_container: ${container} END_VERSIONS diff --git a/modules/local/phoenix_summary_line_failure.nf b/modules/local/phoenix_summary_line_failure.nf index 3f63c7dd..d46eabd7 100644 --- a/modules/local/phoenix_summary_line_failure.nf +++ b/modules/local/phoenix_summary_line_failure.nf @@ -20,17 +20,14 @@ process CREATE_SUMMARY_LINE_FAILURE { "${spades_outcome[0]}" == "run_failure" || "${spades_outcome[1]}" == "no_scaffolds" || "${spades_outcome[2]}" == "no_contigs" script: // This script is bundled with the pipeline, in cdcgov/phoenix/bin/ - // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory. - if (params.ica==false) { ica = "" } - else if (params.ica==true) { ica = "python ${workflow.launchDir}/bin/" } - else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." } // define variables def prefix = task.ext.prefix ?: "${meta.id}" def extended_qc_arg = extended_qc ? "--extended_qc" : "" def container_version = "base_v2.1.0" def container = task.container.toString() - "quay.io/jvhagey/phoenix@" + def script = params.ica ? "python ${params.ica_path}/Phoenix_summary_line.py" : "Phoenix_summary_line.py" """ - ${ica}Phoenix_summary_line.py \\ + ${script} \\ -n ${prefix} \\ -k $trimd_ksummary \\ -t $fastp_total_qc \\ @@ -42,7 +39,7 @@ process CREATE_SUMMARY_LINE_FAILURE { cat <<-END_VERSIONS > versions.yml "${task.process}": python: \$(python --version | sed 's/Python //g') - Phoenix_summary_line.py: \$(${ica}Phoenix_summary_line.py --version ) + Phoenix_summary_line.py: \$(${script} --version ) phoenix_base_container_tag: ${container_version} phoenix_base_container: ${container} END_VERSIONS diff --git a/modules/local/prokka.nf b/modules/local/prokka.nf index ceee5c6e..c3902891 100755 --- a/modules/local/prokka.nf +++ b/modules/local/prokka.nf @@ -24,27 +24,15 @@ process PROKKA { tuple val(meta), path("*.tsv"), emit: tsv path "versions.yml" , emit: versions - when: - //if there are scaffolds left after filtering - "${fairy_outcome[4]}" == "PASSED: More than 0 scaffolds in ${meta.id} after filtering." - script: - //set up for terra - if (params.terra==false) { - terra = "" - terra_exit = "" - } else if (params.terra==true) { - terra = "PATH=/opt/conda/envs/prokka/bin:\$PATH" - terra_exit = """PATH="\$(printf '%s\\n' "\$PATH" | sed 's|/opt/conda/envs/prokka/bin:||')" """ - } else { - error "Please set params.terra to either \"true\" or \"false\"" - } //define variables def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" def proteins_opt = proteins ? "--proteins ${proteins[0]}" : "" def prodigal_opt = prodigal_tf ? "--prodigaltf ${prodigal_tf[0]}" : "" def container = task.container.toString() - "staphb/prokka@" + def terra = params.terra ? "PATH=/opt/conda/envs/prokka/bin:\$PATH" : "" + def terra_exit = params.terra ? """PATH="\$(printf '%s\\n' "\$PATH" | sed 's|/opt/conda/envs/prokka/bin:||')" """ : "" """ #adding python path for running busco on terra $terra diff --git a/modules/local/quast.nf b/modules/local/quast.nf index 59bf2726..92e3df1b 100755 --- a/modules/local/quast.nf +++ b/modules/local/quast.nf @@ -11,10 +11,6 @@ process QUAST { tuple val(meta), path('*.tsv') , emit: report_tsv path "versions.yml" , emit: versions - when: - //if the files are not corrupt and there are equal number of reads in each file then run bbduk - "${fairy_outcome[4]}" == "PASSED: More than 0 scaffolds in ${meta.id} after filtering." - script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" diff --git a/modules/local/rename_fasta_headers.nf b/modules/local/rename_fasta_headers.nf index c59cac46..9239daaf 100644 --- a/modules/local/rename_fasta_headers.nf +++ b/modules/local/rename_fasta_headers.nf @@ -12,26 +12,23 @@ process RENAME_FASTA_HEADERS { path "versions.yml" , emit: versions script: // This script is bundled with the pipeline, in cdcgov/phoenix/bin/ - // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory. - if (params.ica==false) { ica = "" } - else if (params.ica==true) { ica = "python ${workflow.launchDir}/bin/" } - else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." } // define variables def prefix = task.ext.prefix ?: "${meta.id}" def container_version = "base_v2.1.0" def container = task.container.toString() - "quay.io/jvhagey/phoenix@" + def script = params.ica ? "python ${params.ica_path}/rename_fasta_headers.py" : "rename_fasta_headers.py" """ gunzip --force ${assembled_scaffolds} unzipped=\$(basename ${assembled_scaffolds} .gz) #adding this in to allow alternative file names with -entry SCAFFOLDS --scaffolds_ext - ${ica}rename_fasta_headers.py --input \$unzipped --output ${prefix}.renamed.scaffolds.fa --name ${prefix} + ${script} --input \$unzipped --output ${prefix}.renamed.scaffolds.fa --name ${prefix} gzip --force ${prefix}.renamed.scaffolds.fa cat <<-END_VERSIONS > versions.yml "${task.process}": python: \$(python --version | sed 's/Python //g') - rename_fasta_headers.py: \$(${ica}rename_fasta_headers.py --version ) + rename_fasta_headers.py: \$(${script} --version ) phoenix_base_container_tag: ${container_version} phoenix_base_container: ${container} END_VERSIONS diff --git a/modules/local/run_amrfinder.nf b/modules/local/run_amrfinder.nf index e6964722..4d41e09f 100755 --- a/modules/local/run_amrfinder.nf +++ b/modules/local/run_amrfinder.nf @@ -18,24 +18,13 @@ process AMRFINDERPLUS_RUN { task.ext.when == null || task.ext.when script: - // use --organism - if ( "${organism_param[0]}" != "No Match Found") { - organism = "--organism ${organism_param[0]}" - } else { organism = "" } - //set up for terra - if (params.terra==false) { - terra = "" - terra_exit = "" - } else if (params.terra==true) { - terra = "PATH=/opt/conda/envs/amrfinderplus/bin:\$PATH" - terra_exit = """PATH="\$(printf '%s\\n' "\$PATH" | sed 's|/opt/conda/envs/amrfinderplus/bin:||')" """ - } else { - error "Please set params.terra to either \"true\" or \"false\"" - } // define variables def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def container = task.container.toString() - "staphb/ncbi-amrfinderplus@" + def terra = params.terra ? "PATH=/opt/conda/envs/amrfinderplus/bin:\$PATH" : "" + def terra_exit = params.terra ? """PATH="\$(printf '%s\\n' "\$PATH" | sed 's|/opt/conda/envs/amrfinderplus/bin:||')" """ : "" + def organism = "${organism_param[0]}" != "No Match Found" ? "--organism ${organism_param[0]}" : "" //get name of amrfinder database file db_name = db.toString() - '.tar.gz' """ diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf index 952934ae..bc25439b 100644 --- a/modules/local/samplesheet_check.nf +++ b/modules/local/samplesheet_check.nf @@ -12,22 +12,19 @@ process SAMPLESHEET_CHECK { path("versions.yml"), emit: versions script: // This script is bundled with the pipeline, in cdcgov/phoenix/bin/ - // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory. - if (params.ica==false) { ica = "" } - else if (params.ica==true) { ica = "python ${workflow.launchDir}/bin/" } - else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." } // define variables def container_version = "base_v2.1.0" def container = task.container.toString() - "quay.io/jvhagey/phoenix@" + def script = params.ica ? "python ${params.ica_path}/check_samplesheet.py" : "check_samplesheet.py" """ - ${ica}check_samplesheet.py \\ + ${script} \\ $samplesheet \\ samplesheet.valid.csv cat <<-END_VERSIONS > versions.yml "${task.process}": python: \$(python --version | sed 's/Python //g') - check_samplesheet.py: \$(${ica}check_samplesheet.py --version ) + check_samplesheet.py: ${script} --version ) phoenix_base_container_tag: ${container_version} phoenix_base_container: ${container} END_VERSIONS diff --git a/modules/local/scaffolds_samplesheet_check.nf b/modules/local/scaffolds_samplesheet_check.nf index cd701ae0..084bf95b 100644 --- a/modules/local/scaffolds_samplesheet_check.nf +++ b/modules/local/scaffolds_samplesheet_check.nf @@ -12,22 +12,19 @@ process SCAFFOLDS_SAMPLESHEET_CHECK { path "versions.yml", emit: versions script: // This script is bundled with the pipeline, in cdcgov/phoenix/bin/ - // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory. - if (params.ica==false) { ica = "" } - else if (params.ica==true) { ica = "python ${workflow.launchDir}/bin/" } - else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." } // define variables def container_version = "base_v2.1.0" def container = task.container.toString() - "quay.io/jvhagey/phoenix@" + def script = params.ica ? "python ${params.ica_path}/check_assembly_samplesheet.py" : "check_assembly_samplesheet.py" """ - ${ica}check_assembly_samplesheet.py \\ + ${script} \\ $samplesheet \\ samplesheet.valid.csv cat <<-END_VERSIONS > versions.yml "${task.process}": python: \$(python --version | sed 's/Python //g') - check_assembly_samplesheet.py: \$(${ica}check_assembly_samplesheet.py --version ) + check_assembly_samplesheet.py: \$(${script} --version ) phoenix_base_container_tag: ${container_version} phoenix_base_container: ${container} END_VERSIONS diff --git a/modules/local/spades.nf b/modules/local/spades.nf index e919591d..0f49c6e8 100755 --- a/modules/local/spades.nf +++ b/modules/local/spades.nf @@ -24,10 +24,6 @@ process SPADES { tuple val(meta), path("*_spades_outcome.csv") , emit: spades_outcome script: - // Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory. - if (params.ica==false) { ica = "" } - else if (params.ica==true) { ica = "bash ${workflow.launchDir}/bin/" } - else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." } // define variables def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" @@ -37,14 +33,17 @@ process SPADES { def phred_offset = params.phred def extended_qc_arg = extended_qc ? "-c" : "" def container = task.container.toString() - "staphb/spades@" + def script_stats = params.ica ? "${params.ica_path}/pipeline_stats_writer_trimd.sh" : "pipeline_stats_writer_trimd.sh" + def script_before = params.ica ? "${params.ica_path}/beforeSpades.sh" : "beforeSpades.sh" + def script_after = params.ica ? "${params.ica_path}/afterSpades.sh" : "afterSpades.sh" """ # preemptively create _summary_line.csv and .synopsis file in case spades fails (no contigs or scaffolds created) we can still collect upstream stats. - ${ica}pipeline_stats_writer_trimd.sh -a ${fastp_raw_qc} -b ${fastp_total_qc} -c ${reads[0]} -d ${reads[1]} -e ${kraken2_trimd_report} -f ${k2_bh_summary} -g ${krona_trimd} - ${ica}beforeSpades.sh -k ${k2_bh_summary} -n ${prefix} -d ${full_outdir} ${extended_qc_arg} + ${script_stats} -a ${fastp_raw_qc} -b ${fastp_total_qc} -c ${reads[0]} -d ${reads[1]} -e ${kraken2_trimd_report} -f ${k2_bh_summary} -g ${krona_trimd} + ${script_before} -k ${k2_bh_summary} -n ${prefix} -d ${full_outdir} ${extended_qc_arg} #get version information - bspades_version=\$(${ica}beforeSpades.sh -V) - pipestats_version=\$(${ica}pipeline_stats_writer_trimd.sh -V) - aspades_version=\$(${ica}afterSpades.sh -V) + bspades_version=\$(${script_before} -V) + pipestats_version=\$(${script_stats} -V) + aspades_version=\$(${script_after} -V) cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -93,6 +92,6 @@ process SPADES { #Create a summaryline file that will be deleted later if spades is successful if not this line shows up in the final Phoenix_output_summary file #create file '*_spades_outcome.csv' to state if spades fails, if contigs or scaffolds are created. See spades_failure.nf subworkflow #This file will determine if downstream process GENERATE_PIPELINE_STATS_FAILURE and CREATE_SUMMARY_LINE_FAILURE will run (if spades creates contigs, but not scaffolds). - ${ica}afterSpades.sh + ${script_after} """ } diff --git a/nextflow.config b/nextflow.config index 20e2a82f..bb7d2165 100755 --- a/nextflow.config +++ b/nextflow.config @@ -16,19 +16,31 @@ params { busco_db_path = null coverage = 30 // can only increase above 30 + // Run flags + run_busco = false + run_srst2_mlst = false + run_griphin = false + // Additional input parameters for -entry SCAFFOLDS and CDC_SCAFFOLDS indir = null scaffolds_ext = '.scaffolds.fa.gz' + extended_qc = false // Params for filtering minlength = 500 phred = 33 + save_trimmed_fail = true + save_merged = false + save_output_fastqs = true + save_reads_assignment = true // Additional input parameters for -entry SRA and CDC_SRA input_sra = null use_sra = false + asmbld = false // For NCBI spreadsheet creation + ncbi_excel_creation = false microbe_example = "${baseDir}/assets/Microbe.1.0_Example_Data.xlsx" sra_metadata = "${baseDir}/assets/SRA_metadata_example.xlsx" osii_bioprojects = "${baseDir}/assets/osii-bioprojects.yaml" @@ -37,6 +49,7 @@ params { // Terra and ICA specific options terra = false ica = false + ica_path = "${launchDir}/bin/" // Database paths //path2db = "${baseDir}/assets/databases" // need this for kraken2db if you place those files in the assets folder diff --git a/subworkflows/local/kraken2krona.nf b/subworkflows/local/kraken2krona.nf index c89fac81..40e1c5c1 100755 --- a/subworkflows/local/kraken2krona.nf +++ b/subworkflows/local/kraken2krona.nf @@ -2,21 +2,12 @@ // Subworkflow: run Kraken2 // -include { KRAKEN2_KRAKEN2 as KRAKEN2_TRIMD } from '../../modules/local/kraken2' -include { KRAKEN2_KRAKEN2 as KRAKEN2_ASMBLD } from '../../modules/local/kraken2' -include { KRAKEN2_KRAKEN2 as KRAKEN2_WTASMBLD } from '../../modules/local/kraken2' -include { KRAKEN2_KRONA as KREPORT2KRONA_TRIMD } from '../../modules/local/krakentools_kreport2krona' -include { KRAKEN2_KRONA as KREPORT2KRONA_ASMBLD } from '../../modules/local/krakentools_kreport2krona' -include { KRAKEN2_KRONA as KREPORT2KRONA_WTASMBLD } from '../../modules/local/krakentools_kreport2krona' -include { KRONA_KTIMPORTTEXT as KRONA_KTIMPORTTEXT_TRIMD } from '../../modules/local/ktimporttext' -include { KRONA_KTIMPORTTEXT as KRONA_KTIMPORTTEXT_ASMBLD } from '../../modules/local/ktimporttext' -include { KRONA_KTIMPORTTEXT as KRONA_KTIMPORTTEXT_WTASMBLD } from '../../modules/local/ktimporttext' -include { KRAKENTOOLS_KREPORT2MPA as KREPORT2MPA_TRIMD } from '../../modules/local/krakentools_kreport2mpa' -include { KRAKENTOOLS_KREPORT2MPA as KREPORT2MPA_ASMBLD } from '../../modules/local/krakentools_kreport2mpa' -include { KRAKENTOOLS_MAKEKREPORT } from '../../modules/local/krakentools_makekreport' -include { KRAKEN_BEST_HIT as KRAKEN2_BH_TRIMD } from '../../modules/local/kraken_bh' -include { KRAKEN_BEST_HIT as KRAKEN2_BH_ASMBLD } from '../../modules/local/kraken_bh' -include { KRAKEN_BEST_HIT as KRAKEN2_BH_WTASMBLD } from '../../modules/local/kraken_bh' +include { KRAKEN2_KRAKEN2 } from '../../modules/local/kraken2' +include { KRAKEN2_KRONA } from '../../modules/local/krakentools_kreport2krona' +include { KRONA_KTIMPORTTEXT } from '../../modules/local/ktimporttext' +include { KRAKENTOOLS_KREPORT2MPA } from '../../modules/local/krakentools_kreport2mpa' +include { KRAKENTOOLS_MAKEKREPORT } from '../../modules/local/krakentools_makekreport' +include { KRAKEN_BEST_HIT } from '../../modules/local/kraken_bh' workflow KRAKEN2_WF { take: @@ -46,95 +37,27 @@ workflow KRAKEN2_WF { } } - if(type =="trimd") { + // Checking for Contamination in trimmed reads + KRAKEN2_KRAKEN2 ( + fasta_ch, type, params.save_output_fastqs, params.save_reads_assignment + ) + ch_versions = ch_versions.mix(KRAKEN2_KRAKEN2.out.versions) - // Checking for Contamination in trimmed reads - KRAKEN2_TRIMD ( - fasta_ch, "trimd", true, true - ) - ch_versions = ch_versions.mix(KRAKEN2_TRIMD.out.versions) - - // Create mpa file - KREPORT2MPA_TRIMD ( - KRAKEN2_TRIMD.out.report - ) - ch_versions = ch_versions.mix(KREPORT2MPA_TRIMD.out.versions) - - // Converting kraken report to krona file to have hierarchical output in krona plot - KREPORT2KRONA_TRIMD ( - KRAKEN2_TRIMD.out.report, "trimd" - ) - ch_versions = ch_versions.mix(KREPORT2KRONA_TRIMD.out.versions) - - // Create krona plot from kraken report - KRONA_KTIMPORTTEXT_TRIMD ( - KREPORT2KRONA_TRIMD.out.krona, "trimd" - ) - ch_versions = ch_versions.mix(KRONA_KTIMPORTTEXT_TRIMD.out.versions) - - // Combining kraken report with quast report based on meta.id - kraken_bh_trimd_ch = KRAKEN2_TRIMD.out.report.map{meta, report -> [[id:meta.id], report]}\ - .join(qc_stats.map{ meta, fastp_total_qc -> [[id:meta.id], fastp_total_qc]}, by: [0]) - - // Getting Kraken best hit for assembled data - KRAKEN2_BH_TRIMD ( - kraken_bh_trimd_ch, "trimd" - ) - ch_versions = ch_versions.mix(KRAKEN2_BH_TRIMD.out.versions) - - report = KRAKEN2_TRIMD.out.report - k2_bh_summary = KRAKEN2_BH_TRIMD.out.ksummary - krona_html = KRONA_KTIMPORTTEXT_TRIMD.out.html - - } else if(type =="asmbld") { - - // Checking for Contamination in scaffolds - KRAKEN2_ASMBLD ( - fasta_ch, "asmbld", true, true - ) - ch_versions = ch_versions.mix(KRAKEN2_ASMBLD.out.versions) - - // Create mpa file - KREPORT2MPA_ASMBLD ( - KRAKEN2_ASMBLD.out.report - ) - ch_versions = ch_versions.mix(KREPORT2MPA_ASMBLD.out.versions) + // Create mpa file + KRAKENTOOLS_KREPORT2MPA ( + KRAKEN2_KRAKEN2.out.report + ) + ch_versions = ch_versions.mix(KRAKENTOOLS_KREPORT2MPA.out.versions) + if (type == "trimd" || type == "asmbld"){ // Converting kraken report to krona file to have hierarchical output in krona plot - KREPORT2KRONA_ASMBLD ( - KRAKEN2_ASMBLD.out.report, "asmbld" - ) - ch_versions = ch_versions.mix(KREPORT2KRONA_ASMBLD.out.versions) - - // Create krona plot from kraken report - KRONA_KTIMPORTTEXT_ASMBLD ( - KREPORT2KRONA_ASMBLD.out.krona, "asmbld" + KRAKEN2_KRONA ( + KRAKEN2_KRAKEN2.out.report, type ) - ch_versions = ch_versions.mix(KRONA_KTIMPORTTEXT_ASMBLD.out.versions) - - kraken_bh_asmbld_ch = KRAKEN2_ASMBLD.out.report.map{meta, report -> [[id:meta.id], report]}\ - .join(quast.map{ meta, report_tsv -> [[id:meta.id], report_tsv]}, by: [0]) - - // Getting Kraken best hit for assembled data - KRAKEN2_BH_ASMBLD ( - kraken_bh_asmbld_ch, "asmbld" - ) - ch_versions = ch_versions.mix(KRAKEN2_BH_ASMBLD.out.versions) - - report = KRAKEN2_ASMBLD.out.report - k2_bh_summary = KRAKEN2_BH_ASMBLD.out.ksummary - krona_html = KRONA_KTIMPORTTEXT_ASMBLD.out.html - - } else if(type=="wtasmbld") { - - // Getting species ID as back up for FastANI and checking contamination isn't in assembly - KRAKEN2_WTASMBLD ( - fasta_ch, "wtasmbld", true, true - ) - ch_versions = ch_versions.mix(KRAKEN2_WTASMBLD.out.versions) - + report = KRAKEN2_KRAKEN2.out.report + } else if (type == "wtasmbld"){ // Add in krakendb into the kraken reads channel so each fasta has a krakendb to go with it. - make_report_ch = KRAKEN2_WTASMBLD.out.classified_reads_assignment.combine(kraken2_db_path) + make_report_ch = KRAKEN2_KRAKEN2.out.classified_reads_assignment.combine(kraken2_db_path) // Create weighted kraken report based on scaffold length KRAKENTOOLS_MAKEKREPORT ( @@ -143,38 +66,44 @@ workflow KRAKEN2_WF { ch_versions = ch_versions.mix(KRAKENTOOLS_MAKEKREPORT.out.versions) // Converting kraken report to krona file to have hierarchical output in krona plot - KREPORT2KRONA_WTASMBLD ( + KRAKEN2_KRONA ( KRAKENTOOLS_MAKEKREPORT.out.kraken_weighted_report, "wtasmbld" ) - ch_versions = ch_versions.mix(KREPORT2KRONA_WTASMBLD.out.versions) + + report = KRAKENTOOLS_MAKEKREPORT.out.kraken_weighted_report + } + ch_versions = ch_versions.mix(KRAKEN2_KRONA.out.versions) + + + // Create krona plot from kraken report + KRONA_KTIMPORTTEXT ( + KRAKEN2_KRONA.out.krona, type + ) + ch_versions = ch_versions.mix(KRONA_KTIMPORTTEXT.out.versions) + if (type == "trimd"){ // Combining kraken report with quast report based on meta.id - kraken_bh_wtasmbld_ch = KRAKENTOOLS_MAKEKREPORT.out.kraken_weighted_report.map{meta, kraken_weighted_report -> [[id:meta.id], kraken_weighted_report]}\ + kraken_bh_ch = KRAKEN2_KRAKEN2.out.report.map{meta, report -> [[id:meta.id], report]}\ + .join(qc_stats.map{ meta, fastp_total_qc -> [[id:meta.id], fastp_total_qc]}, by: [0]) + } else if (type == "asmbld"){ + // Combining kraken report with quast report based on meta.id + kraken_bh_ch = KRAKEN2_KRAKEN2.out.report.map{meta, report -> [[id:meta.id], report]}\ + .join(quast.map{ meta, report_tsv -> [[id:meta.id], report_tsv]}, by: [0]) + } else if (type == "wtasmbld"){ + // Combining kraken report with quast report based on meta.id + kraken_bh_ch = KRAKENTOOLS_MAKEKREPORT.out.kraken_weighted_report.map{meta, kraken_weighted_report -> [[id:meta.id], kraken_weighted_report]}\ .join(quast.map{ meta, report_tsv -> [[id:meta.id], report_tsv]}, by: [0]) - - // Getting Kraken best hit for assembled data - KRAKEN2_BH_WTASMBLD ( - kraken_bh_wtasmbld_ch, "wtasmbld" - ) - ch_versions = ch_versions.mix(KRAKEN2_BH_WTASMBLD.out.versions) - - KRONA_KTIMPORTTEXT_WTASMBLD ( - KREPORT2KRONA_WTASMBLD.out.krona, "wtasmbld" - ) - ch_versions = ch_versions.mix(KRONA_KTIMPORTTEXT_WTASMBLD.out.versions) - - report = KRAKENTOOLS_MAKEKREPORT.out.kraken_weighted_report - k2_bh_summary = KRAKEN2_BH_WTASMBLD.out.ksummary - krona_html = KRONA_KTIMPORTTEXT_WTASMBLD.out.html - - } else { - println("Type options are: wtasmbld, asmbld or trimd") } + + // Getting Kraken best hit for assembled data + KRAKEN_BEST_HIT ( + kraken_bh_ch, type + ) + ch_versions = ch_versions.mix(KRAKEN_BEST_HIT.out.versions) emit: report = report - k2_bh_summary = k2_bh_summary - krona_html = krona_html + k2_bh_summary = KRAKEN_BEST_HIT.out.ksummary + krona_html = KRONA_KTIMPORTTEXT.out.html versions = ch_versions // channel: [ versions.yml ] - } \ No newline at end of file diff --git a/subworkflows/local/spades_failure.nf b/subworkflows/local/spades_failure.nf index 878b73bd..c5a70acc 100755 --- a/subworkflows/local/spades_failure.nf +++ b/subworkflows/local/spades_failure.nf @@ -85,7 +85,8 @@ workflow SPADES_WF { // Combining weighted kraken report with the FastANI hit based on meta.id best_hit_ch = k2_bh_summary.map{ meta, ksummary -> [[id:meta.id], ksummary]}\ .join(SPADES.out.spades_outcome.splitCsv(strip:true).map{meta, spades_outcome -> [[id:meta.id], spades_outcome]}) - + .filter { it[2][0].contains('run_failure') || it[2][1].contains('no_scaffolds') || it[2][2].contains('no_contigs')} + // Getting ID from either FastANI or if fails, from Kraken2 DETERMINE_TAXA_ID_FAILURE ( best_hit_ch, params.nodes, params.names diff --git a/workflows/phoenix.nf b/workflows/phoenix.nf index 42fee2c8..a4b3feeb 100644 --- a/workflows/phoenix.nf +++ b/workflows/phoenix.nf @@ -113,7 +113,7 @@ workflow PHOENIX_EXTERNAL { ) ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) - //unzip any zipped databases + // unzip any zipped databases ASSET_CHECK ( params.zipped_sketch, params.custom_mlstdb, kraken2_db_path ) @@ -121,23 +121,29 @@ workflow PHOENIX_EXTERNAL { //fairy compressed file corruption check & generate read stats CORRUPTION_CHECK ( - INPUT_CHECK.out.reads, false // true says busco is being run in this workflow + INPUT_CHECK.out.reads, params.run_busco ) ch_versions = ch_versions.mix(CORRUPTION_CHECK.out.versions) //Combining reads with output of corruption check. By=2 is for getting R1 and R2 results //The mapping here is just to get things in the right bracket so we can call var[0] - read_stats_ch = INPUT_CHECK.out.reads.join(CORRUPTION_CHECK.out.outcome_to_edit, by: [0,0]) - .join(CORRUPTION_CHECK.out.outcome.splitCsv(strip:true, by:2).map{meta, fairy_outcome -> [meta, [fairy_outcome[0][0], fairy_outcome[1][0]]]}, by: [0,0]) + read_stats_ch = INPUT_CHECK.out.reads + .join(CORRUPTION_CHECK.out.outcome, by: [0,0]) + .join(CORRUPTION_CHECK.out.outcome.splitCsv(strip:true, by:2) + .map{meta, fairy_outcome -> [meta, [fairy_outcome[0][0], fairy_outcome[1][0]]]}, by: [0,0]) + .filter { it[3].findAll {!it.contains('FAILED')}} - //Get stats on raw reads if the reads aren't corrupted + // Get stats on raw reads if the reads aren't corrupted GET_RAW_STATS ( - read_stats_ch, false // false says no busco is being run + read_stats_ch, params.run_busco // false says no busco is being run ) ch_versions = ch_versions.mix(GET_RAW_STATS.out.versions) // Combining reads with output of corruption check - bbduk_ch = INPUT_CHECK.out.reads.join(GET_RAW_STATS.out.outcome.splitCsv(strip:true, by:3).map{meta, fairy_outcome -> [meta, [fairy_outcome[0][0], fairy_outcome[1][0], fairy_outcome[2][0]]]}, by: [0,0]) + bbduk_ch = INPUT_CHECK.out.reads + .join(GET_RAW_STATS.out.outcome.splitCsv(strip:true, by:3) + .map{meta, fairy_outcome -> [meta, [fairy_outcome[0][0], fairy_outcome[1][0], fairy_outcome[2][0]]]}, by: [0,0]) + .filter { it[2].findAll {!it.contains('FAILED')}} // Remove PhiX reads BBDUK ( @@ -147,7 +153,7 @@ workflow PHOENIX_EXTERNAL { // Trim and remove low quality reads FASTP_TRIMD ( - BBDUK.out.reads, true, false + BBDUK.out.reads, params.save_trimmed_fail, params.save_merged ) ch_versions = ch_versions.mix(FASTP_TRIMD.out.versions) @@ -160,16 +166,19 @@ workflow PHOENIX_EXTERNAL { // Combining fastp json outputs based on meta.id fastp_json_ch = FASTP_TRIMD.out.json.join(FASTP_SINGLES.out.json, by: [0,0])\ .join(GET_RAW_STATS.out.combined_raw_stats, by: [0,0])\ - .join(GET_RAW_STATS.out.outcome_to_edit, by: [0,0]) + .join(GET_RAW_STATS.out.outcome, by: [0,0]) // Script gathers data from fastp jsons for pipeline stats file GET_TRIMD_STATS ( - fastp_json_ch, false // false says no busco is being run + fastp_json_ch, params.run_busco // false says no busco is being run ) ch_versions = ch_versions.mix(GET_TRIMD_STATS.out.versions) // combing fastp_trimd information with fairy check of reads to confirm there are reads after filtering - trimd_reads_file_integrity_ch = FASTP_TRIMD.out.reads.join(GET_TRIMD_STATS.out.outcome.splitCsv(strip:true, by:5).map{meta, fairy_outcome -> [meta, [fairy_outcome[0][0], fairy_outcome[1][0], fairy_outcome[2][0], fairy_outcome[3][0], fairy_outcome[4][0]]]}, by: [0,0]) + trimd_reads_file_integrity_ch = FASTP_TRIMD.out.reads + .join(GET_TRIMD_STATS.out.outcome.splitCsv(strip:true, by:5) + .map{meta, fairy_outcome -> [meta, [fairy_outcome[0][0], fairy_outcome[1][0], fairy_outcome[2][0], fairy_outcome[3][0], fairy_outcome[4][0]]]}, by: [0,0]) + .filter { it[2].findAll {!it.contains('FAILED')}} // Running Fastqc on trimmed reads FASTQCTRIMD ( @@ -192,7 +201,7 @@ workflow PHOENIX_EXTERNAL { KRAKEN2_TRIMD.out.report, \ KRAKEN2_TRIMD.out.krona_html, \ KRAKEN2_TRIMD.out.k2_bh_summary, \ - false + params.extended_qc ) ch_versions = ch_versions.mix(SPADES_WF.out.versions) @@ -209,24 +218,26 @@ workflow PHOENIX_EXTERNAL { ch_versions = ch_versions.mix(BBMAP_REFORMAT.out.versions) // Combine bbmap log with the fairy outcome file - scaffold_check_ch = BBMAP_REFORMAT.out.log.map{meta, log -> [[id:meta.id], log]}\ - .join(GET_TRIMD_STATS.out.outcome_to_edit.map{ meta, outcome_to_edit -> [[id:meta.id], outcome_to_edit]}, by: [0])\ - .join(GET_RAW_STATS.out.combined_raw_stats.map{meta, combined_raw_stats -> [[id:meta.id], combined_raw_stats]}, by: [0])\ - .join(GET_TRIMD_STATS.out.fastp_total_qc.map{ meta, fastp_total_qc -> [[id:meta.id], fastp_total_qc]}, by: [0])\ - .join(KRAKEN2_TRIMD.out.report.map{ meta, report -> [[id:meta.id], report]}, by: [0])\ - .join(KRAKEN2_TRIMD.out.k2_bh_summary.map{ meta, k2_bh_summary -> [[id:meta.id], k2_bh_summary]}, by: [0])\ - .join(KRAKEN2_TRIMD.out.krona_html.map{ meta, krona_html -> [[id:meta.id], krona_html]}, by: [0]) + scaffold_check_ch = BBMAP_REFORMAT.out.log.map{ meta, log -> [[id:meta.id], log]}\ + .join(GET_TRIMD_STATS.out.outcome.map{ meta, outcome -> [[id:meta.id], outcome]}, by: [0])\ + .join(GET_RAW_STATS.out.combined_raw_stats.map{ meta, combined_raw_stats -> [[id:meta.id], combined_raw_stats]}, by: [0])\ + .join(GET_TRIMD_STATS.out.fastp_total_qc.map{ meta, fastp_total_qc -> [[id:meta.id], fastp_total_qc]}, by: [0])\ + .join(KRAKEN2_TRIMD.out.report.map{ meta, report -> [[id:meta.id], report]}, by: [0])\ + .join(KRAKEN2_TRIMD.out.k2_bh_summary.map{ meta, k2_bh_summary -> [[id:meta.id], k2_bh_summary]}, by: [0])\ + .join(KRAKEN2_TRIMD.out.krona_html.map{ meta, krona_html -> [[id:meta.id], krona_html]}, by: [0]) // Checking that there are still scaffolds left after filtering SCAFFOLD_COUNT_CHECK ( - scaffold_check_ch, false, params.coverage, params.nodes, params.names + scaffold_check_ch, params.extended_qc, params.coverage, params.nodes, params.names ) ch_versions = ch_versions.mix(SCAFFOLD_COUNT_CHECK.out.versions) //combing scaffolds with scaffold check information to ensure processes that need scaffolds only run when there are scaffolds in the file filtered_scaffolds_ch = BBMAP_REFORMAT.out.filtered_scaffolds.map{ meta, filtered_scaffolds -> [[id:meta.id], filtered_scaffolds]} - .join(SCAFFOLD_COUNT_CHECK.out.outcome.splitCsv(strip:true, by:5).map{meta, fairy_outcome -> [meta, [fairy_outcome[0][0], fairy_outcome[1][0], fairy_outcome[2][0], fairy_outcome[3][0], fairy_outcome[4][0]]]}, by: [0]) - + .join(SCAFFOLD_COUNT_CHECK.out.outcome.splitCsv(strip:true, by:5) + .map{meta, fairy_outcome -> [meta, [fairy_outcome[0][0], fairy_outcome[1][0], fairy_outcome[2][0], fairy_outcome[3][0], fairy_outcome[4][0]]]}, by: [0]) + .filter { it[2].findAll {it.contains('PASSED: More than 0 scaffolds')}} + // Running gamma to identify hypervirulence genes in scaffolds GAMMA_HV ( filtered_scaffolds_ch, params.hvgamdb @@ -250,6 +261,12 @@ workflow PHOENIX_EXTERNAL { ) ch_versions = ch_versions.mix(QUAST.out.versions) + // get gff and protein files for amrfinder+ + PROKKA ( + filtered_scaffolds_ch, [], [] + ) + ch_versions = ch_versions.mix(PROKKA.out.versions) + // Creating krona plots and best hit files for weighted assembly KRAKEN2_WTASMBLD ( BBMAP_REFORMAT.out.filtered_scaffolds, SCAFFOLD_COUNT_CHECK.out.outcome, "wtasmbld", [], QUAST.out.report_tsv, ASSET_CHECK.out.kraken_db, "reads" @@ -309,16 +326,10 @@ workflow PHOENIX_EXTERNAL { FASTP_TRIMD.out.reads, \ DETERMINE_TAXA_ID.out.taxonomy, \ ASSET_CHECK.out.mlst_db, \ - false + params.run_srst2_mlst ) ch_versions = ch_versions.mix(DO_MLST.out.versions) - // get gff and protein files for amrfinder+ - PROKKA ( - filtered_scaffolds_ch, [], [] - ) - ch_versions = ch_versions.mix(PROKKA.out.versions) - /*// Fetch AMRFinder Database AMRFINDERPLUS_UPDATE( ) ch_versions = ch_versions.mix(AMRFINDERPLUS_UPDATE.out.versions)*/ @@ -351,10 +362,26 @@ workflow PHOENIX_EXTERNAL { ) ch_versions = ch_versions.mix(CALCULATE_ASSEMBLY_RATIO.out.versions) + // prepare inputs to the stats wf + if (params.run_srst2_mlst){ + fullgene_results=SRST2_TRIMD_AR.out.fullgene_results + } else { + fullgene_results=[] + } + if (params.asmbld){ + asmbld_report=KRAKEN2_ASMBLD.out.report // channel: tuple (meta) path(report) + asmbld_krona_html=KRAKEN2_ASMBLD.out.krona_html // channel: tuple (meta) path(krona_html) + asmbld_k2_bh_summary=KRAKEN2_ASMBLD.out.k2_bh_summary // channel: tuple (meta) path(k2_bh_summary) + } else{ + asmbld_report=[] + asmbld_krona_html=[] + asmbld_k2_bh_summary=[] + } + GENERATE_PIPELINE_STATS_WF ( GET_RAW_STATS.out.combined_raw_stats, \ GET_TRIMD_STATS.out.fastp_total_qc, \ - [], \ + fullgene_results, \ KRAKEN2_TRIMD.out.report, \ KRAKEN2_TRIMD.out.krona_html, \ KRAKEN2_TRIMD.out.k2_bh_summary, \ @@ -365,7 +392,7 @@ workflow PHOENIX_EXTERNAL { GAMMA_AR.out.gamma, \ GAMMA_PF.out.gamma, \ QUAST.out.report_tsv, \ - [], [], [], [], \ + params.run_busco, asmbld_report, asmbld_krona_html, asmbld_k2_bh_summary, \ KRAKEN2_WTASMBLD.out.report, \ KRAKEN2_WTASMBLD.out.krona_html, \ KRAKEN2_WTASMBLD.out.k2_bh_summary, \ @@ -374,9 +401,9 @@ workflow PHOENIX_EXTERNAL { CALCULATE_ASSEMBLY_RATIO.out.ratio, \ AMRFINDERPLUS_RUN.out.mutation_report, \ CALCULATE_ASSEMBLY_RATIO.out.gc_content, \ - false + params.extended_qc ) - ch_versions = ch_versions.mix(GENERATE_PIPELINE_STATS_WF.out.versions) + ch_versions = ch_versions.mix(GENERATE_PIPELINE_STATS_WF.out.versions) // Combining output based on meta.id to create summary by sample -- is this verbose, ugly and annoying? yes, if anyone has a slicker way to do this we welcome the input. line_summary_ch = GET_TRIMD_STATS.out.fastp_total_qc.map{meta, fastp_total_qc -> [[id:meta.id], fastp_total_qc]}\ @@ -412,66 +439,79 @@ workflow PHOENIX_EXTERNAL { // combine all line summaries into one channel spades_failure_summaries_ch = FETCH_FAILED_SUMMARIES.out.spades_failure_summary_line fairy_summary_ch = CORRUPTION_CHECK.out.summary_line.collect().ifEmpty( [] )\ - .combine(GET_RAW_STATS.out.summary_line.collect().ifEmpty( [] ))\ - .combine(GET_TRIMD_STATS.out.summary_line.collect().ifEmpty( [] ))\ - .combine(SCAFFOLD_COUNT_CHECK.out.summary_line.collect().ifEmpty( [] ))\ - .ifEmpty( [] ) + .combine(GET_RAW_STATS.out.summary_line.collect().ifEmpty( [] ))\ + .combine(GET_TRIMD_STATS.out.summary_line.collect().ifEmpty( [] ))\ + .combine(SCAFFOLD_COUNT_CHECK.out.summary_line.collect().ifEmpty( [] ))\ + .ifEmpty( [] ) // pulling it all together - all_summaries_ch = spades_failure_summaries_ch.combine(failed_summaries_ch).combine(summaries_ch).combine(fairy_summary_ch) - - // Combining sample summaries into final report - GATHER_SUMMARY_LINES ( - all_summaries_ch, outdir_path, false - ) - ch_versions = ch_versions.mix(GATHER_SUMMARY_LINES.out.versions) - - //create GRiPHin report - GRIPHIN ( - all_summaries_ch, INPUT_CHECK.out.valid_samplesheet, params.ardb, outdir_path, params.coverage, true, false - ) - ch_versions = ch_versions.mix(GRIPHIN.out.versions) - - if (ncbi_excel_creation == true && params.create_ncbi_sheet == true) { - // requiring files so that this process doesn't start until needed files are made. - required_files_ch = FASTP_TRIMD.out.reads.map{ meta, reads -> reads[0]}.collect().combine(DO_MLST.out.checked_MLSTs.map{ meta, checked_MLSTs -> checked_MLSTs}.collect()).combine(DETERMINE_TAXA_ID.out.taxonomy.map{ meta, taxonomy -> taxonomy}.collect()) - - //Fill out NCBI excel sheets for upload based on what PHX found - CREATE_NCBI_UPLOAD_SHEET ( - required_files_ch, params.microbe_example, params.sra_metadata, params.osii_bioprojects, outdir_path, GRIPHIN.out.griphin_tsv_report - ) - ch_versions = ch_versions.mix(CREATE_NCBI_UPLOAD_SHEET.out.versions) - } - - // Collecting the software versions - CUSTOM_DUMPSOFTWAREVERSIONS ( - ch_versions.unique().collectFile(name: 'collated_versions.yml') - ) - - // - // MODULE: MultiQC - // - workflow_summary = WorkflowPhoenix.paramsSummaryMultiqc(workflow, summary_params) - ch_workflow_summary = Channel.value(workflow_summary) - - ch_multiqc_files = Channel.empty() - ch_multiqc_files = ch_multiqc_files.mix(Channel.from(ch_multiqc_config)) - ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_custom_config.collect().ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) - ch_multiqc_files = ch_multiqc_files.mix(FASTQCTRIMD.out.zip.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(FASTP_TRIMD.out.json.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(FASTP_SINGLES.out.json.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(BBDUK.out.log.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(QUAST.out.report_tsv.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(KRAKEN2_TRIMD.out.report.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(KRAKEN2_WTASMBLD.out.report.collect{it[1]}.ifEmpty([])) - - MULTIQC ( - ch_multiqc_files.collect() - ) - multiqc_report = MULTIQC.out.report.toList() - ch_versions = ch_versions.mix(MULTIQC.out.versions) + all_summaries_ch = spades_failure_summaries_ch + .combine(failed_summaries_ch) + .combine(summaries_ch) + .combine(fairy_summary_ch) + + // // Combining sample summaries into final report + // GATHER_SUMMARY_LINES ( + // all_summaries_ch, params.run_busco + // ) + // ch_versions = ch_versions.mix(GATHER_SUMMARY_LINES.out.versions) + + // //create GRiPHin report + // if(params.run_griphin) { + // GRIPHIN ( + // all_summaries_ch, INPUT_CHECK.out.valid_samplesheet, params.ardb, outdir_path, params.coverage, true, false + // ) + // ch_versions = ch_versions.mix(GRIPHIN.out.versions) + // } + + // // Create NCBI sheet + // if (params.ncbi_excel_creation == true && params.create_ncbi_sheet == true) { + // // requiring files so that this process doesn't start until needed files are made. + // required_files_ch = FASTP_TRIMD.out.reads + // .map{ meta, reads -> reads[0]} + // .collect() + // .combine(DO_MLST.out.checked_MLSTs + // .map{ meta, checked_MLSTs -> checked_MLSTs} + // .collect()) + // .combine(DETERMINE_TAXA_ID.out.taxonomy + // .map{ meta, taxonomy -> taxonomy}.collect()) + + // //Fill out NCBI excel sheets for upload based on what PHX found + // CREATE_NCBI_UPLOAD_SHEET ( + // required_files_ch, params.microbe_example, params.sra_metadata, params.osii_bioprojects, outdir_path, GRIPHIN.out.griphin_tsv_report + // ) + // ch_versions = ch_versions.mix(CREATE_NCBI_UPLOAD_SHEET.out.versions) + // } + + // // Collecting the software versions + // CUSTOM_DUMPSOFTWAREVERSIONS ( + // ch_versions.unique().collectFile(name: 'collated_versions.yml') + // ) + + // // + // // MODULE: MultiQC + // // + // workflow_summary = WorkflowPhoenix.paramsSummaryMultiqc(workflow, summary_params) + // ch_workflow_summary = Channel.value(workflow_summary) + + // ch_multiqc_files = Channel.empty() + // ch_multiqc_files = ch_multiqc_files.mix(Channel.from(ch_multiqc_config)) + // ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_custom_config.collect().ifEmpty([])) + // ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + // ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) + // ch_multiqc_files = ch_multiqc_files.mix(FASTQCTRIMD.out.zip.collect{it[1]}.ifEmpty([])) + // ch_multiqc_files = ch_multiqc_files.mix(FASTP_TRIMD.out.json.collect{it[1]}.ifEmpty([])) + // ch_multiqc_files = ch_multiqc_files.mix(FASTP_SINGLES.out.json.collect{it[1]}.ifEmpty([])) + // ch_multiqc_files = ch_multiqc_files.mix(BBDUK.out.log.collect{it[1]}.ifEmpty([])) + // ch_multiqc_files = ch_multiqc_files.mix(QUAST.out.report_tsv.collect{it[1]}.ifEmpty([])) + // ch_multiqc_files = ch_multiqc_files.mix(KRAKEN2_TRIMD.out.report.collect{it[1]}.ifEmpty([])) + // ch_multiqc_files = ch_multiqc_files.mix(KRAKEN2_WTASMBLD.out.report.collect{it[1]}.ifEmpty([])) + + // MULTIQC ( + // ch_multiqc_files.collect() + // ) + // multiqc_report = MULTIQC.out.report.toList() + // ch_versions = ch_versions.mix(MULTIQC.out.versions) emit: scaffolds = BBMAP_REFORMAT.out.filtered_scaffolds @@ -479,11 +519,11 @@ workflow PHOENIX_EXTERNAL { mlst = DO_MLST.out.checked_MLSTs amrfinder_output = AMRFINDERPLUS_RUN.out.report gamma_ar = GAMMA_AR.out.gamma - phx_summary = GATHER_SUMMARY_LINES.out.summary_report + phx_summary = GATHER_SUMMARY_LINES.out.summary_report //output for phylophoenix - griphin_tsv = GRIPHIN.out.griphin_report - griphin_excel = GRIPHIN.out.griphin_tsv_report - dir_samplesheet = GRIPHIN.out.converted_samplesheet + griphin_tsv = params.run_griphin ? GRIPHIN.out.griphin_report : null + griphin_excel = params.run_griphin ? GRIPHIN.out.griphin_tsv_report : null + dir_samplesheet = params.run_griphin ? GRIPHIN.out.converted_samplesheet : null //output for ncbi upload ncbi_sra_sheet = params.create_ncbi_sheet ? CREATE_NCBI_UPLOAD_SHEET.out.ncbi_sra : null ncbi_biosample_sheet = params.create_ncbi_sheet ? CREATE_NCBI_UPLOAD_SHEET.out.ncbi_biosample : null