Skip to content

Commit

Permalink
feat: Add entry to download references (#38)
Browse files Browse the repository at this point in the history
* feat: Add the download entry to the pipeline

* docs: Add params for the download entry

* fix: Remove requirement for input and kaiju_db

* fix: Remove check for input in workflowmain

* fix: Declare euryale after download

* fix: Change location for checking input

---------

Signed-off-by: jvfe <[email protected]>
  • Loading branch information
jvfe authored Jun 4, 2024
1 parent ae51217 commit 53b2076
Show file tree
Hide file tree
Showing 11 changed files with 201 additions and 27 deletions.
12 changes: 4 additions & 8 deletions conf/base.config
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@

process {

// TODO nf-core: Check the defaults for all processes
cpus = { check_max( 1 * task.attempt, 'cpus' ) }
memory = { check_max( 6.GB * task.attempt, 'memory' ) }
time = { check_max( 4.h * task.attempt, 'time' ) }
Expand All @@ -19,13 +18,6 @@ process {
maxRetries = 1
maxErrors = '-1'

// Process-specific resource requirements
// NOTE - Please try and re-use the labels below as much as possible.
// These labels are used and recognised by default in DSL2 files hosted on nf-core/modules.
// If possible, it would be nice to keep the same label naming convention when
// adding in your local modules too.
// TODO nf-core: Customise requirements for specific processes.
// See https://www.nextflow.io/docs/latest/config.html#config-process-selectors
withLabel:process_single {
cpus = { check_max( 1 , 'cpus' ) }
memory = { check_max( 6.GB * task.attempt, 'memory' ) }
Expand Down Expand Up @@ -59,6 +51,10 @@ process {
errorStrategy = 'retry'
maxRetries = 2
}
withLabel:error_retry_delay {
errorStrategy = { sleep(Math.pow(2, task.attempt) * 200 as long); return 'retry' }
maxRetries = 3
}
withName:CUSTOM_DUMPSOFTWAREVERSIONS {
cache = false
}
Expand Down
31 changes: 31 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,37 @@ process {
]
}

// DOWNLOAD ENTRY
withName: DOWNLOAD_FUNCTIONAL_DB {
publishDir = [
path: { "${params.outdir}/references/functional" },
mode: "move",
]
}
withName: DOWNLOAD_FUNCTIONAL_DICT {
publishDir = [
path: { "${params.outdir}/references/functional" },
mode: "move",
]
}
withName: DOWNLOAD_KAIJU {
publishDir = [
path: { "${params.outdir}/references/kaiju" },
mode: "move",
]
}
withName: DOWNLOAD_KRAKEN {
publishDir = [
path: { "${params.outdir}/references/kraken2" },
mode: "move",
]
}
withName: DOWNLOAD_HOST {
publishDir = [
path: { "${params.outdir}/references/host" },
mode: "move",
]
}
// Host removal
withName: BOWTIE2_ALIGN {
ext.args = "--sensitive"
Expand Down
7 changes: 7 additions & 0 deletions conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,13 @@ params {
id_mapping = "$projectDir/test_data/idmapping_selected.tab.example.gz"
reference_fasta = "$projectDir/test_data/protein.faa.gz"

// download entry
functional_db = 'https://github.com/dalmolingroup/euryale/raw/main/test_data/protein.faa.gz'
functional_dictionary = 'https://github.com/dalmolingroup/euryale/raw/main/test_data/idmapping_selected.tab.example.gz'
kaiju_db_url = 'https://github.com/nf-core/test-datasets/raw/taxprofiler/data/database/kaiju/kaiju.tar.gz'
kraken2_db_url = 'https://github.com/nf-core/test-datasets/raw/taxprofiler/data/database/kraken2/testdb-kraken2.tar.gz'
host_url = 'https://github.com/dalmolingroup/euryale/raw/main/test_data/GCA_002596845.1_ASM259684v1_genomic.fna.gz'

// Annotation params
minimum_bitscore = 30
minimum_pident = 30
Expand Down
16 changes: 16 additions & 0 deletions docs/params.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,22 @@ Reference genome related files and options required for the workflow.
| `igenomes_ignore` | Do not load the iGenomes reference config. <details><summary>Help</summary><small>Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`.</small></details>| `boolean` | | | True |
| `fasta` | | `string` | | | |

## Download Entry



| Parameter | Description | Type | Default | Required | Hidden |
|-----------|-----------|-----------|-----------|-----------|-----------|
| `download_functional` | Whether to dowload functional references | `boolean` | True | | |
| `download_kaiju` | Whether to dowload the Kaiju reference db | `boolean` | True | | |
| `download_kraken` | Whether to dowload the Kraken2 reference db | `boolean` | | | |
| `download_host` | Whether to download the host reference genome | `boolean` | | | |
| `functional_db` | Functional reference URL (download entry) | `string` | https://ftp.ncbi.nlm.nih.gov/blast/db/FASTA/nr.gz | | |
| `functional_dictionary` | Functional dictionary URL (download entry) | `string` | https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/idmapping.dat.gz | | |
| `kaiju_db_url` | Kaiju reference URL (download entry) | `string` | https://kaiju-idx.s3.eu-central-1.amazonaws.com/2023/kaiju_db_nr_2023-05-10.tgz | | |
| `kraken2_db_url` | Kraken2 reference URL (download entry) | `string` | https://genome-idx.s3.amazonaws.com/kraken/k2_standard_08gb_20240112.tar.gz | | |
| `host_url` | Host FASTA reference URL (download entry) | `string` | http://ftp.ensembl.org/pub/release-112/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz | | |

## Max job request options

Set the top limit for requested resources for any single job.
Expand Down
6 changes: 0 additions & 6 deletions lib/WorkflowMain.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -78,12 +78,6 @@ class WorkflowMain {

// Check AWS batch settings
NfcoreTemplate.awsBatch(workflow, params)

// Check input has been provided
if (!params.input) {
log.error "Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'"
System.exit(1)
}
}
//
// Get attribute from genome config file e.g. fasta
Expand Down
10 changes: 4 additions & 6 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,10 @@ WorkflowMain.initialise(workflow, params, log)
*/

include { EURYALE } from './workflows/euryale'
include { DOWNLOAD } from './workflows/download'

//
// WORKFLOW: Run main dalmolingroup/euryale analysis pipeline
//
workflow DALMOLINGROUP_EURYALE {
EURYALE ()
workflow download {
DOWNLOAD ()
}

/*
Expand All @@ -51,7 +49,7 @@ workflow DALMOLINGROUP_EURYALE {
// See: https://github.com/nf-core/rnaseq/issues/619
//
workflow {
DALMOLINGROUP_EURYALE ()
EURYALE ()
}

/*
Expand Down
25 changes: 25 additions & 0 deletions modules/local/download/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
process DOWNLOAD {
tag "$id"

label 'process_single'
label 'error_retry_delay'

input:
val id
val url

output:
path "${prefix}", emit: db

script:
prefix = task.ext.prefix ?: "${id}"

"""
wget -O ${prefix} $url
"""

stub:
"""
touch ${prefix}
"""
}
12 changes: 12 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,18 @@ params {
skip_alignment = false
skip_microview = false

// Dowload entry options
download_functional = true
download_kaiju = true
download_kraken = false
download_host = false

functional_db = 'https://ftp.ncbi.nlm.nih.gov/blast/db/FASTA/nr.gz'
functional_dictionary = 'https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/idmapping.dat.gz'
kaiju_db_url = 'https://kaiju-idx.s3.eu-central-1.amazonaws.com/2023/kaiju_db_nr_2023-05-10.tgz'
kraken2_db_url = 'https://genome-idx.s3.amazonaws.com/kraken/k2_standard_08gb_20240112.tar.gz'
host_url = 'http://ftp.ensembl.org/pub/release-112/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz'

// MultiQC options
multiqc_config = null
multiqc_title = null
Expand Down
59 changes: 55 additions & 4 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
"fa_icon": "fas fa-terminal",
"description": "Define where the pipeline should find input data and save output data.",
"required": [
"input",
"outdir"
],
"properties": {
Expand Down Expand Up @@ -154,9 +153,6 @@
"description": "Run Kraken2 classifier"
}
},
"required": [
"kaiju_db"
],
"fa_icon": "fab fa-pagelines"
},
"functional": {
Expand Down Expand Up @@ -237,6 +233,58 @@
}
}
},
"download_entry": {
"title": "Download Entry",
"type": "object",
"description": "",
"default": "",
"properties": {
"download_functional": {
"type": "boolean",
"default": true,
"description": "Whether to dowload functional references"
},
"download_kaiju": {
"type": "boolean",
"default": true,
"description": "Whether to dowload the Kaiju reference db"
},
"download_kraken": {
"type": "boolean",
"description": "Whether to dowload the Kraken2 reference db"
},
"download_host": {
"type": "boolean",
"description": "Whether to download the host reference genome"
},
"functional_db": {
"type": "string",
"default": "https://ftp.ncbi.nlm.nih.gov/blast/db/FASTA/nr.gz",
"description": "Functional reference URL (download entry)"
},
"functional_dictionary": {
"type": "string",
"default": "https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/idmapping.dat.gz",
"description": "Functional dictionary URL (download entry)"
},
"kaiju_db_url": {
"type": "string",
"default": "https://kaiju-idx.s3.eu-central-1.amazonaws.com/2023/kaiju_db_nr_2023-05-10.tgz",
"description": "Kaiju reference URL (download entry)"
},
"kraken2_db_url": {
"type": "string",
"default": "https://genome-idx.s3.amazonaws.com/kraken/k2_standard_08gb_20240112.tar.gz",
"description": "Kraken2 reference URL (download entry)"
},
"host_url": {
"type": "string",
"default": "http://ftp.ensembl.org/pub/release-112/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz",
"description": "Host FASTA reference URL (download entry)"
}
},
"fa_icon": "fas fa-database"
},
"max_job_request_options": {
"title": "Max job request options",
"type": "object",
Expand Down Expand Up @@ -413,6 +461,9 @@
{
"$ref": "#/definitions/reference_genome_options"
},
{
"$ref": "#/definitions/download_entry"
},
{
"$ref": "#/definitions/max_job_request_options"
},
Expand Down
45 changes: 45 additions & 0 deletions workflows/download.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
VALIDATE INPUTS
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params)

// Validate input parameters
WorkflowEuryale.initialise(params, log)

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
IMPORT LOCAL MODULES/SUBWORKFLOWS
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

//
// MODULES
//

include { DOWNLOAD as DOWNLOAD_FUNCTIONAL_DB } from '../modules/local/download/main'
include { DOWNLOAD as DOWNLOAD_FUNCTIONAL_DICT } from '../modules/local/download/main'
include { DOWNLOAD as DOWNLOAD_KAIJU } from '../modules/local/download/main'
include { DOWNLOAD as DOWNLOAD_KRAKEN } from '../modules/local/download/main'
include { DOWNLOAD as DOWNLOAD_HOST } from '../modules/local/download/main'

workflow DOWNLOAD {
if (params.download_functional) {
DOWNLOAD_FUNCTIONAL_DB("reference_fasta.fa.gz", params.functional_db)
DOWNLOAD_FUNCTIONAL_DICT("id_mapping.tab.gz", params.functional_dictionary)
}

if (params.download_kaiju) {
DOWNLOAD_KAIJU("kaiju_db.tar.gz", params.kaiju_db_url)
}

if (params.download_kraken) {
DOWNLOAD_KRAKEN("kraken2_db.tar.gz", params.kraken2_db_url)
}

if (params.download_host) {
DOWNLOAD_HOST("host_fasta.fa.gz", params.host_url)
}
}
5 changes: 2 additions & 3 deletions workflows/euryale.nf
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,6 @@ WorkflowEuryale.initialise(params, log)
def checkPathParamList = [ params.input, params.multiqc_config, params.kaiju_db ]
for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } }

// Check mandatory parameters
if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' }

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
CONFIG FILES
Expand Down Expand Up @@ -72,6 +69,8 @@ include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoft
def multiqc_report = []

workflow EURYALE {
// Check mandatory parameters
if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' }
if (params.reference_fasta == null && params.diamond_db == null && params.skip_alignment == false) { exit 1, 'A reference fasta (--reference_fasta) or a DIAMOND db (--diamond_db) must be specified' }
if (params.run_kaiju == true && params.kaiju_db == null && params.skip_classification == false) {exit 1, 'A Kaiju tar.gz database must be specified with --kaiju_db'}
if (params.run_kraken2 == true && params.kraken2_db == null && params.skip_classification == false) {exit 1, 'A Kraken2 database must be specified with --kraken2_db'}
Expand Down

0 comments on commit 53b2076

Please sign in to comment.