Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add entry to download references #38

Merged
merged 6 commits into from
Jun 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 4 additions & 8 deletions conf/base.config
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@

process {

// TODO nf-core: Check the defaults for all processes
cpus = { check_max( 1 * task.attempt, 'cpus' ) }
memory = { check_max( 6.GB * task.attempt, 'memory' ) }
time = { check_max( 4.h * task.attempt, 'time' ) }
Expand All @@ -19,13 +18,6 @@ process {
maxRetries = 1
maxErrors = '-1'

// Process-specific resource requirements
// NOTE - Please try and re-use the labels below as much as possible.
// These labels are used and recognised by default in DSL2 files hosted on nf-core/modules.
// If possible, it would be nice to keep the same label naming convention when
// adding in your local modules too.
// TODO nf-core: Customise requirements for specific processes.
// See https://www.nextflow.io/docs/latest/config.html#config-process-selectors
withLabel:process_single {
cpus = { check_max( 1 , 'cpus' ) }
memory = { check_max( 6.GB * task.attempt, 'memory' ) }
Expand Down Expand Up @@ -59,6 +51,10 @@ process {
errorStrategy = 'retry'
maxRetries = 2
}
withLabel:error_retry_delay {
errorStrategy = { sleep(Math.pow(2, task.attempt) * 200 as long); return 'retry' }
maxRetries = 3
}
withName:CUSTOM_DUMPSOFTWAREVERSIONS {
cache = false
}
Expand Down
31 changes: 31 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,37 @@ process {
]
}

// DOWNLOAD ENTRY
withName: DOWNLOAD_FUNCTIONAL_DB {
publishDir = [
path: { "${params.outdir}/references/functional" },
mode: "move",
]
}
withName: DOWNLOAD_FUNCTIONAL_DICT {
publishDir = [
path: { "${params.outdir}/references/functional" },
mode: "move",
]
}
withName: DOWNLOAD_KAIJU {
publishDir = [
path: { "${params.outdir}/references/kaiju" },
mode: "move",
]
}
withName: DOWNLOAD_KRAKEN {
publishDir = [
path: { "${params.outdir}/references/kraken2" },
mode: "move",
]
}
withName: DOWNLOAD_HOST {
publishDir = [
path: { "${params.outdir}/references/host" },
mode: "move",
]
}
// Host removal
withName: BOWTIE2_ALIGN {
ext.args = "--sensitive"
Expand Down
7 changes: 7 additions & 0 deletions conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,13 @@ params {
id_mapping = "$projectDir/test_data/idmapping_selected.tab.example.gz"
reference_fasta = "$projectDir/test_data/protein.faa.gz"

// download entry
functional_db = 'https://github.com/dalmolingroup/euryale/raw/main/test_data/protein.faa.gz'
functional_dictionary = 'https://github.com/dalmolingroup/euryale/raw/main/test_data/idmapping_selected.tab.example.gz'
kaiju_db_url = 'https://github.com/nf-core/test-datasets/raw/taxprofiler/data/database/kaiju/kaiju.tar.gz'
kraken2_db_url = 'https://github.com/nf-core/test-datasets/raw/taxprofiler/data/database/kraken2/testdb-kraken2.tar.gz'
host_url = 'https://github.com/dalmolingroup/euryale/raw/main/test_data/GCA_002596845.1_ASM259684v1_genomic.fna.gz'

// Annotation params
minimum_bitscore = 30
minimum_pident = 30
Expand Down
16 changes: 16 additions & 0 deletions docs/params.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,22 @@ Reference genome related files and options required for the workflow.
| `igenomes_ignore` | Do not load the iGenomes reference config. <details><summary>Help</summary><small>Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`.</small></details>| `boolean` | | | True |
| `fasta` | | `string` | | | |

## Download Entry



| Parameter | Description | Type | Default | Required | Hidden |
|-----------|-----------|-----------|-----------|-----------|-----------|
| `download_functional` | Whether to dowload functional references | `boolean` | True | | |
| `download_kaiju` | Whether to dowload the Kaiju reference db | `boolean` | True | | |
| `download_kraken` | Whether to dowload the Kraken2 reference db | `boolean` | | | |
| `download_host` | Whether to download the host reference genome | `boolean` | | | |
| `functional_db` | Functional reference URL (download entry) | `string` | https://ftp.ncbi.nlm.nih.gov/blast/db/FASTA/nr.gz | | |
| `functional_dictionary` | Functional dictionary URL (download entry) | `string` | https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/idmapping.dat.gz | | |
| `kaiju_db_url` | Kaiju reference URL (download entry) | `string` | https://kaiju-idx.s3.eu-central-1.amazonaws.com/2023/kaiju_db_nr_2023-05-10.tgz | | |
| `kraken2_db_url` | Kraken2 reference URL (download entry) | `string` | https://genome-idx.s3.amazonaws.com/kraken/k2_standard_08gb_20240112.tar.gz | | |
| `host_url` | Host FASTA reference URL (download entry) | `string` | http://ftp.ensembl.org/pub/release-112/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz | | |

## Max job request options

Set the top limit for requested resources for any single job.
Expand Down
6 changes: 0 additions & 6 deletions lib/WorkflowMain.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -78,12 +78,6 @@ class WorkflowMain {

// Check AWS batch settings
NfcoreTemplate.awsBatch(workflow, params)

// Check input has been provided
if (!params.input) {
log.error "Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'"
System.exit(1)
}
}
//
// Get attribute from genome config file e.g. fasta
Expand Down
10 changes: 4 additions & 6 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,10 @@ WorkflowMain.initialise(workflow, params, log)
*/

include { EURYALE } from './workflows/euryale'
include { DOWNLOAD } from './workflows/download'

//
// WORKFLOW: Run main dalmolingroup/euryale analysis pipeline
//
workflow DALMOLINGROUP_EURYALE {
EURYALE ()
workflow download {
DOWNLOAD ()
}

/*
Expand All @@ -51,7 +49,7 @@ workflow DALMOLINGROUP_EURYALE {
// See: https://github.com/nf-core/rnaseq/issues/619
//
workflow {
DALMOLINGROUP_EURYALE ()
EURYALE ()
}

/*
Expand Down
25 changes: 25 additions & 0 deletions modules/local/download/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
process DOWNLOAD {
tag "$id"

label 'process_single'
label 'error_retry_delay'

input:
val id
val url

output:
path "${prefix}", emit: db

script:
prefix = task.ext.prefix ?: "${id}"

"""
wget -O ${prefix} $url
"""

stub:
"""
touch ${prefix}
"""
}
12 changes: 12 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,18 @@ params {
skip_alignment = false
skip_microview = false

// Dowload entry options
download_functional = true
download_kaiju = true
download_kraken = false
download_host = false

functional_db = 'https://ftp.ncbi.nlm.nih.gov/blast/db/FASTA/nr.gz'
functional_dictionary = 'https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/idmapping.dat.gz'
kaiju_db_url = 'https://kaiju-idx.s3.eu-central-1.amazonaws.com/2023/kaiju_db_nr_2023-05-10.tgz'
kraken2_db_url = 'https://genome-idx.s3.amazonaws.com/kraken/k2_standard_08gb_20240112.tar.gz'
host_url = 'http://ftp.ensembl.org/pub/release-112/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz'

// MultiQC options
multiqc_config = null
multiqc_title = null
Expand Down
59 changes: 55 additions & 4 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
"fa_icon": "fas fa-terminal",
"description": "Define where the pipeline should find input data and save output data.",
"required": [
"input",
"outdir"
],
"properties": {
Expand Down Expand Up @@ -154,9 +153,6 @@
"description": "Run Kraken2 classifier"
}
},
"required": [
"kaiju_db"
],
"fa_icon": "fab fa-pagelines"
},
"functional": {
Expand Down Expand Up @@ -237,6 +233,58 @@
}
}
},
"download_entry": {
"title": "Download Entry",
"type": "object",
"description": "",
"default": "",
"properties": {
"download_functional": {
"type": "boolean",
"default": true,
"description": "Whether to dowload functional references"
},
"download_kaiju": {
"type": "boolean",
"default": true,
"description": "Whether to dowload the Kaiju reference db"
},
"download_kraken": {
"type": "boolean",
"description": "Whether to dowload the Kraken2 reference db"
},
"download_host": {
"type": "boolean",
"description": "Whether to download the host reference genome"
},
"functional_db": {
"type": "string",
"default": "https://ftp.ncbi.nlm.nih.gov/blast/db/FASTA/nr.gz",
"description": "Functional reference URL (download entry)"
},
"functional_dictionary": {
"type": "string",
"default": "https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/idmapping.dat.gz",
"description": "Functional dictionary URL (download entry)"
},
"kaiju_db_url": {
"type": "string",
"default": "https://kaiju-idx.s3.eu-central-1.amazonaws.com/2023/kaiju_db_nr_2023-05-10.tgz",
"description": "Kaiju reference URL (download entry)"
},
"kraken2_db_url": {
"type": "string",
"default": "https://genome-idx.s3.amazonaws.com/kraken/k2_standard_08gb_20240112.tar.gz",
"description": "Kraken2 reference URL (download entry)"
},
"host_url": {
"type": "string",
"default": "http://ftp.ensembl.org/pub/release-112/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz",
"description": "Host FASTA reference URL (download entry)"
}
},
"fa_icon": "fas fa-database"
},
"max_job_request_options": {
"title": "Max job request options",
"type": "object",
Expand Down Expand Up @@ -413,6 +461,9 @@
{
"$ref": "#/definitions/reference_genome_options"
},
{
"$ref": "#/definitions/download_entry"
},
{
"$ref": "#/definitions/max_job_request_options"
},
Expand Down
45 changes: 45 additions & 0 deletions workflows/download.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
VALIDATE INPUTS
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params)

// Validate input parameters
WorkflowEuryale.initialise(params, log)

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
IMPORT LOCAL MODULES/SUBWORKFLOWS
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

//
// MODULES
//

include { DOWNLOAD as DOWNLOAD_FUNCTIONAL_DB } from '../modules/local/download/main'
include { DOWNLOAD as DOWNLOAD_FUNCTIONAL_DICT } from '../modules/local/download/main'
include { DOWNLOAD as DOWNLOAD_KAIJU } from '../modules/local/download/main'
include { DOWNLOAD as DOWNLOAD_KRAKEN } from '../modules/local/download/main'
include { DOWNLOAD as DOWNLOAD_HOST } from '../modules/local/download/main'

workflow DOWNLOAD {
if (params.download_functional) {
DOWNLOAD_FUNCTIONAL_DB("reference_fasta.fa.gz", params.functional_db)
DOWNLOAD_FUNCTIONAL_DICT("id_mapping.tab.gz", params.functional_dictionary)
}

if (params.download_kaiju) {
DOWNLOAD_KAIJU("kaiju_db.tar.gz", params.kaiju_db_url)
}

if (params.download_kraken) {
DOWNLOAD_KRAKEN("kraken2_db.tar.gz", params.kraken2_db_url)
}

if (params.download_host) {
DOWNLOAD_HOST("host_fasta.fa.gz", params.host_url)
}
}
5 changes: 2 additions & 3 deletions workflows/euryale.nf
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,6 @@ WorkflowEuryale.initialise(params, log)
def checkPathParamList = [ params.input, params.multiqc_config, params.kaiju_db ]
for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } }

// Check mandatory parameters
if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' }

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
CONFIG FILES
Expand Down Expand Up @@ -72,6 +69,8 @@ include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoft
def multiqc_report = []

workflow EURYALE {
// Check mandatory parameters
if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' }
if (params.reference_fasta == null && params.diamond_db == null && params.skip_alignment == false) { exit 1, 'A reference fasta (--reference_fasta) or a DIAMOND db (--diamond_db) must be specified' }
if (params.run_kaiju == true && params.kaiju_db == null && params.skip_classification == false) {exit 1, 'A Kaiju tar.gz database must be specified with --kaiju_db'}
if (params.run_kraken2 == true && params.kraken2_db == null && params.skip_classification == false) {exit 1, 'A Kraken2 database must be specified with --kraken2_db'}
Expand Down
Loading