From 6d0ab8b6a96de53e9c3cc0df086048f59a56f512 Mon Sep 17 00:00:00 2001 From: jvfe Date: Mon, 3 Jun 2024 17:01:27 -0300 Subject: [PATCH 1/6] feat: Add the download entry to the pipeline Signed-off-by: jvfe --- conf/base.config | 12 +++---- conf/modules.config | 31 ++++++++++++++++++ conf/test.config | 7 ++++ main.nf | 5 +++ modules/local/download/main.nf | 25 ++++++++++++++ nextflow.config | 12 +++++++ nextflow_schema.json | 60 ++++++++++++++++++++++++++++++++-- workflows/download.nf | 45 +++++++++++++++++++++++++ 8 files changed, 187 insertions(+), 10 deletions(-) create mode 100644 modules/local/download/main.nf create mode 100644 workflows/download.nf diff --git a/conf/base.config b/conf/base.config index 0158901..d62b5e1 100644 --- a/conf/base.config +++ b/conf/base.config @@ -10,7 +10,6 @@ process { - // TODO nf-core: Check the defaults for all processes cpus = { check_max( 1 * task.attempt, 'cpus' ) } memory = { check_max( 6.GB * task.attempt, 'memory' ) } time = { check_max( 4.h * task.attempt, 'time' ) } @@ -19,13 +18,6 @@ process { maxRetries = 1 maxErrors = '-1' - // Process-specific resource requirements - // NOTE - Please try and re-use the labels below as much as possible. - // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. - // If possible, it would be nice to keep the same label naming convention when - // adding in your local modules too. - // TODO nf-core: Customise requirements for specific processes. - // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors withLabel:process_single { cpus = { check_max( 1 , 'cpus' ) } memory = { check_max( 6.GB * task.attempt, 'memory' ) } @@ -59,6 +51,10 @@ process { errorStrategy = 'retry' maxRetries = 2 } + withLabel:error_retry_delay { + errorStrategy = { sleep(Math.pow(2, task.attempt) * 200 as long); return 'retry' } + maxRetries = 3 + } withName:CUSTOM_DUMPSOFTWAREVERSIONS { cache = false } diff --git a/conf/modules.config b/conf/modules.config index 6bf4697..be1fe2d 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -27,6 +27,37 @@ process { ] } + // DOWNLOAD ENTRY + withName: DOWNLOAD_FUNCTIONAL_DB { + publishDir = [ + path: { "${params.outdir}/references/functional" }, + mode: "move", + ] + } + withName: DOWNLOAD_FUNCTIONAL_DICT { + publishDir = [ + path: { "${params.outdir}/references/functional" }, + mode: "move", + ] + } + withName: DOWNLOAD_KAIJU { + publishDir = [ + path: { "${params.outdir}/references/kaiju" }, + mode: "move", + ] + } + withName: DOWNLOAD_KRAKEN { + publishDir = [ + path: { "${params.outdir}/references/kraken2" }, + mode: "move", + ] + } + withName: DOWNLOAD_HOST { + publishDir = [ + path: { "${params.outdir}/references/host" }, + mode: "move", + ] + } // Host removal withName: BOWTIE2_ALIGN { ext.args = "--sensitive" diff --git a/conf/test.config b/conf/test.config index c2ffbe1..dccd906 100644 --- a/conf/test.config +++ b/conf/test.config @@ -26,6 +26,13 @@ params { id_mapping = "$projectDir/test_data/idmapping_selected.tab.example.gz" reference_fasta = "$projectDir/test_data/protein.faa.gz" + // download entry + functional_db = 'https://github.com/dalmolingroup/euryale/raw/main/test_data/protein.faa.gz' + functional_dictionary = 'https://github.com/dalmolingroup/euryale/raw/main/test_data/idmapping_selected.tab.example.gz' + kaiju_db_url = 'https://github.com/nf-core/test-datasets/raw/taxprofiler/data/database/kaiju/kaiju.tar.gz' + kraken2_db_url = 'https://github.com/nf-core/test-datasets/raw/taxprofiler/data/database/kraken2/testdb-kraken2.tar.gz' + host_url = 'https://github.com/dalmolingroup/euryale/raw/main/test_data/GCA_002596845.1_ASM259684v1_genomic.fna.gz' + // Annotation params minimum_bitscore = 30 minimum_pident = 30 diff --git a/main.nf b/main.nf index 13ac8fd..7419278 100644 --- a/main.nf +++ b/main.nf @@ -32,6 +32,7 @@ WorkflowMain.initialise(workflow, params, log) */ include { EURYALE } from './workflows/euryale' +include { DOWNLOAD } from './workflows/download' // // WORKFLOW: Run main dalmolingroup/euryale analysis pipeline @@ -40,6 +41,10 @@ workflow DALMOLINGROUP_EURYALE { EURYALE () } +workflow download { + DOWNLOAD () +} + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN ALL WORKFLOWS diff --git a/modules/local/download/main.nf b/modules/local/download/main.nf new file mode 100644 index 0000000..6f12b08 --- /dev/null +++ b/modules/local/download/main.nf @@ -0,0 +1,25 @@ +process DOWNLOAD { + tag "$id" + + label 'process_single' + label 'error_retry_delay' + + input: + val id + val url + + output: + path "${prefix}", emit: db + + script: + prefix = task.ext.prefix ?: "${id}" + + """ + wget -O ${prefix} $url + """ + + stub: + """ + touch ${prefix} + """ +} diff --git a/nextflow.config b/nextflow.config index eb9dcf7..d98ac4c 100644 --- a/nextflow.config +++ b/nextflow.config @@ -50,6 +50,18 @@ params { skip_alignment = false skip_microview = false + // Dowload entry options + download_functional = true + download_kaiju = true + download_kraken = false + download_host = false + + functional_db = 'https://ftp.ncbi.nlm.nih.gov/blast/db/FASTA/nr.gz' + functional_dictionary = 'https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/idmapping.dat.gz' + kaiju_db_url = 'https://kaiju-idx.s3.eu-central-1.amazonaws.com/2023/kaiju_db_nr_2023-05-10.tgz' + kraken2_db_url = 'https://genome-idx.s3.amazonaws.com/kraken/k2_standard_08gb_20240112.tar.gz' + host_url = 'http://ftp.ensembl.org/pub/release-112/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz' + // MultiQC options multiqc_config = null multiqc_title = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 35e15f4..7183bc6 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -237,6 +237,58 @@ } } }, + "download_entry": { + "title": "Download Entry", + "type": "object", + "description": "", + "default": "", + "properties": { + "download_functional": { + "type": "boolean", + "default": true, + "description": "Whether to dowload functional references" + }, + "download_kaiju": { + "type": "boolean", + "default": true, + "description": "Whether to dowload the Kaiju reference db" + }, + "download_kraken": { + "type": "boolean", + "description": "Whether to dowload the Kraken2 reference db" + }, + "download_host": { + "type": "boolean", + "description": "Whether to download the host reference genome" + }, + "functional_db": { + "type": "string", + "default": "https://ftp.ncbi.nlm.nih.gov/blast/db/FASTA/nr.gz", + "description": "Functional reference URL (download entry)" + }, + "functional_dictionary": { + "type": "string", + "default": "https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/idmapping.dat.gz", + "description": "Functional dictionary URL (download entry)" + }, + "kaiju_db_url": { + "type": "string", + "default": "https://kaiju-idx.s3.eu-central-1.amazonaws.com/2023/kaiju_db_nr_2023-05-10.tgz", + "description": "Kaiju reference URL (download entry)" + }, + "kraken2_db_url": { + "type": "string", + "default": "https://genome-idx.s3.amazonaws.com/kraken/k2_standard_08gb_20240112.tar.gz", + "description": "Kraken2 reference URL (download entry)" + }, + "host_url": { + "type": "string", + "default": "http://ftp.ensembl.org/pub/release-112/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz", + "description": "Host FASTA reference URL (download entry)" + } + }, + "fa_icon": "fas fa-database" + }, "max_job_request_options": { "title": "Max job request options", "type": "object", @@ -352,7 +404,8 @@ "type": "string", "description": "Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file", "fa_icon": "fas fa-image", - "hidden": true + "hidden": true, + "default": "/home/jvfe/dev/pesquisa/dalmolingroup-euryale/assets/euryale_logo.png" }, "multiqc_methods_description": { "type": "string", @@ -362,7 +415,7 @@ "tracedir": { "type": "string", "description": "Directory to keep pipeline Nextflow logs and reports.", - "default": "${params.outdir}/pipeline_info", + "default": "null/pipeline_info", "fa_icon": "fas fa-cogs", "hidden": true }, @@ -413,6 +466,9 @@ { "$ref": "#/definitions/reference_genome_options" }, + { + "$ref": "#/definitions/download_entry" + }, { "$ref": "#/definitions/max_job_request_options" }, diff --git a/workflows/download.nf b/workflows/download.nf new file mode 100644 index 0000000..083470d --- /dev/null +++ b/workflows/download.nf @@ -0,0 +1,45 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + VALIDATE INPUTS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) + +// Validate input parameters +WorkflowEuryale.initialise(params, log) + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT LOCAL MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// MODULES +// + +include { DOWNLOAD as DOWNLOAD_FUNCTIONAL_DB } from '../modules/local/download/main' +include { DOWNLOAD as DOWNLOAD_FUNCTIONAL_DICT } from '../modules/local/download/main' +include { DOWNLOAD as DOWNLOAD_KAIJU } from '../modules/local/download/main' +include { DOWNLOAD as DOWNLOAD_KRAKEN } from '../modules/local/download/main' +include { DOWNLOAD as DOWNLOAD_HOST } from '../modules/local/download/main' + +workflow DOWNLOAD { + if (params.download_functional) { + DOWNLOAD_FUNCTIONAL_DB("reference_fasta.fa.gz", params.functional_db) + DOWNLOAD_FUNCTIONAL_DICT("id_mapping.tab.gz", params.functional_dictionary) + } + + if (params.download_kaiju) { + DOWNLOAD_KAIJU("kaiju_db.tar.gz", params.kaiju_db_url) + } + + if (params.download_kraken) { + DOWNLOAD_KRAKEN("kraken2_db.tar.gz", params.kraken2_db_url) + } + + if (params.download_host) { + DOWNLOAD_HOST("host_fasta.fa.gz", params.host_url) + } +} From 1e55398de3ee5a8f919bd33697e3749ab781d4fd Mon Sep 17 00:00:00 2001 From: jvfe Date: Mon, 3 Jun 2024 17:02:54 -0300 Subject: [PATCH 2/6] docs: Add params for the download entry Signed-off-by: jvfe --- docs/params.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/docs/params.md b/docs/params.md index a4e6f84..f27a5e2 100644 --- a/docs/params.md +++ b/docs/params.md @@ -87,6 +87,22 @@ Reference genome related files and options required for the workflow. | `igenomes_ignore` | Do not load the iGenomes reference config.
HelpDo not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`.
| `boolean` | | | True | | `fasta` | | `string` | | | | +## Download Entry + + + +| Parameter | Description | Type | Default | Required | Hidden | +|-----------|-----------|-----------|-----------|-----------|-----------| +| `download_functional` | Whether to dowload functional references | `boolean` | True | | | +| `download_kaiju` | Whether to dowload the Kaiju reference db | `boolean` | True | | | +| `download_kraken` | Whether to dowload the Kraken2 reference db | `boolean` | | | | +| `download_host` | Whether to download the host reference genome | `boolean` | | | | +| `functional_db` | Functional reference URL (download entry) | `string` | https://ftp.ncbi.nlm.nih.gov/blast/db/FASTA/nr.gz | | | +| `functional_dictionary` | Functional dictionary URL (download entry) | `string` | https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/idmapping.dat.gz | | | +| `kaiju_db_url` | Kaiju reference URL (download entry) | `string` | https://kaiju-idx.s3.eu-central-1.amazonaws.com/2023/kaiju_db_nr_2023-05-10.tgz | | | +| `kraken2_db_url` | Kraken2 reference URL (download entry) | `string` | https://genome-idx.s3.amazonaws.com/kraken/k2_standard_08gb_20240112.tar.gz | | | +| `host_url` | Host FASTA reference URL (download entry) | `string` | http://ftp.ensembl.org/pub/release-112/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz | | | + ## Max job request options Set the top limit for requested resources for any single job. From a28ecf8f51757676c9ff568e6cc35139a464c5fb Mon Sep 17 00:00:00 2001 From: jvfe Date: Mon, 3 Jun 2024 18:04:14 -0300 Subject: [PATCH 3/6] fix: Remove requirement for input and kaiju_db Signed-off-by: jvfe --- nextflow_schema.json | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 7183bc6..c8f1b68 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -11,7 +11,6 @@ "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", "required": [ - "input", "outdir" ], "properties": { @@ -154,9 +153,6 @@ "description": "Run Kraken2 classifier" } }, - "required": [ - "kaiju_db" - ], "fa_icon": "fab fa-pagelines" }, "functional": { @@ -404,8 +400,7 @@ "type": "string", "description": "Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file", "fa_icon": "fas fa-image", - "hidden": true, - "default": "/home/jvfe/dev/pesquisa/dalmolingroup-euryale/assets/euryale_logo.png" + "hidden": true }, "multiqc_methods_description": { "type": "string", @@ -415,7 +410,7 @@ "tracedir": { "type": "string", "description": "Directory to keep pipeline Nextflow logs and reports.", - "default": "null/pipeline_info", + "default": "${params.outdir}/pipeline_info", "fa_icon": "fas fa-cogs", "hidden": true }, From 7bac00f3b5e79301cfe10b1ae4f4d6cd8e97b7be Mon Sep 17 00:00:00 2001 From: jvfe Date: Mon, 3 Jun 2024 18:08:14 -0300 Subject: [PATCH 4/6] fix: Remove check for input in workflowmain Signed-off-by: jvfe --- lib/WorkflowMain.groovy | 6 ------ 1 file changed, 6 deletions(-) diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index 5bd1677..3bdbbf4 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -78,12 +78,6 @@ class WorkflowMain { // Check AWS batch settings NfcoreTemplate.awsBatch(workflow, params) - - // Check input has been provided - if (!params.input) { - log.error "Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'" - System.exit(1) - } } // // Get attribute from genome config file e.g. fasta From 0e2e15d63b762dd7d44fd180de77cef21bff43ca Mon Sep 17 00:00:00 2001 From: jvfe Date: Mon, 3 Jun 2024 18:13:40 -0300 Subject: [PATCH 5/6] fix: Declare euryale after download Signed-off-by: jvfe --- main.nf | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/main.nf b/main.nf index 7419278..5fc23b4 100644 --- a/main.nf +++ b/main.nf @@ -34,13 +34,6 @@ WorkflowMain.initialise(workflow, params, log) include { EURYALE } from './workflows/euryale' include { DOWNLOAD } from './workflows/download' -// -// WORKFLOW: Run main dalmolingroup/euryale analysis pipeline -// -workflow DALMOLINGROUP_EURYALE { - EURYALE () -} - workflow download { DOWNLOAD () } @@ -56,7 +49,7 @@ workflow download { // See: https://github.com/nf-core/rnaseq/issues/619 // workflow { - DALMOLINGROUP_EURYALE () + EURYALE () } /* From 3a092f56926e3cfd79684971df6cc3a68a4c847a Mon Sep 17 00:00:00 2001 From: jvfe Date: Mon, 3 Jun 2024 18:18:00 -0300 Subject: [PATCH 6/6] fix: Change location for checking input Signed-off-by: jvfe --- workflows/euryale.nf | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/workflows/euryale.nf b/workflows/euryale.nf index d5999fb..11a5414 100644 --- a/workflows/euryale.nf +++ b/workflows/euryale.nf @@ -13,9 +13,6 @@ WorkflowEuryale.initialise(params, log) def checkPathParamList = [ params.input, params.multiqc_config, params.kaiju_db ] for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } -// Check mandatory parameters -if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ CONFIG FILES @@ -72,6 +69,8 @@ include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoft def multiqc_report = [] workflow EURYALE { + // Check mandatory parameters + if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } if (params.reference_fasta == null && params.diamond_db == null && params.skip_alignment == false) { exit 1, 'A reference fasta (--reference_fasta) or a DIAMOND db (--diamond_db) must be specified' } if (params.run_kaiju == true && params.kaiju_db == null && params.skip_classification == false) {exit 1, 'A Kaiju tar.gz database must be specified with --kaiju_db'} if (params.run_kraken2 == true && params.kraken2_db == null && params.skip_classification == false) {exit 1, 'A Kraken2 database must be specified with --kraken2_db'}