Skip to content

Commit

Permalink
feat: Add the download entry to the pipeline
Browse files Browse the repository at this point in the history
Signed-off-by: jvfe <[email protected]>
  • Loading branch information
jvfe committed Jun 3, 2024
1 parent ae51217 commit 6d0ab8b
Show file tree
Hide file tree
Showing 8 changed files with 187 additions and 10 deletions.
12 changes: 4 additions & 8 deletions conf/base.config
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@

process {

// TODO nf-core: Check the defaults for all processes
cpus = { check_max( 1 * task.attempt, 'cpus' ) }
memory = { check_max( 6.GB * task.attempt, 'memory' ) }
time = { check_max( 4.h * task.attempt, 'time' ) }
Expand All @@ -19,13 +18,6 @@ process {
maxRetries = 1
maxErrors = '-1'

// Process-specific resource requirements
// NOTE - Please try and re-use the labels below as much as possible.
// These labels are used and recognised by default in DSL2 files hosted on nf-core/modules.
// If possible, it would be nice to keep the same label naming convention when
// adding in your local modules too.
// TODO nf-core: Customise requirements for specific processes.
// See https://www.nextflow.io/docs/latest/config.html#config-process-selectors
withLabel:process_single {
cpus = { check_max( 1 , 'cpus' ) }
memory = { check_max( 6.GB * task.attempt, 'memory' ) }
Expand Down Expand Up @@ -59,6 +51,10 @@ process {
errorStrategy = 'retry'
maxRetries = 2
}
withLabel:error_retry_delay {
errorStrategy = { sleep(Math.pow(2, task.attempt) * 200 as long); return 'retry' }
maxRetries = 3
}
withName:CUSTOM_DUMPSOFTWAREVERSIONS {
cache = false
}
Expand Down
31 changes: 31 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,37 @@ process {
]
}

// DOWNLOAD ENTRY
withName: DOWNLOAD_FUNCTIONAL_DB {
publishDir = [
path: { "${params.outdir}/references/functional" },
mode: "move",
]
}
withName: DOWNLOAD_FUNCTIONAL_DICT {
publishDir = [
path: { "${params.outdir}/references/functional" },
mode: "move",
]
}
withName: DOWNLOAD_KAIJU {
publishDir = [
path: { "${params.outdir}/references/kaiju" },
mode: "move",
]
}
withName: DOWNLOAD_KRAKEN {
publishDir = [
path: { "${params.outdir}/references/kraken2" },
mode: "move",
]
}
withName: DOWNLOAD_HOST {
publishDir = [
path: { "${params.outdir}/references/host" },
mode: "move",
]
}
// Host removal
withName: BOWTIE2_ALIGN {
ext.args = "--sensitive"
Expand Down
7 changes: 7 additions & 0 deletions conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,13 @@ params {
id_mapping = "$projectDir/test_data/idmapping_selected.tab.example.gz"
reference_fasta = "$projectDir/test_data/protein.faa.gz"

// download entry
functional_db = 'https://github.com/dalmolingroup/euryale/raw/main/test_data/protein.faa.gz'
functional_dictionary = 'https://github.com/dalmolingroup/euryale/raw/main/test_data/idmapping_selected.tab.example.gz'
kaiju_db_url = 'https://github.com/nf-core/test-datasets/raw/taxprofiler/data/database/kaiju/kaiju.tar.gz'
kraken2_db_url = 'https://github.com/nf-core/test-datasets/raw/taxprofiler/data/database/kraken2/testdb-kraken2.tar.gz'
host_url = 'https://github.com/dalmolingroup/euryale/raw/main/test_data/GCA_002596845.1_ASM259684v1_genomic.fna.gz'

// Annotation params
minimum_bitscore = 30
minimum_pident = 30
Expand Down
5 changes: 5 additions & 0 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ WorkflowMain.initialise(workflow, params, log)
*/

include { EURYALE } from './workflows/euryale'
include { DOWNLOAD } from './workflows/download'

//
// WORKFLOW: Run main dalmolingroup/euryale analysis pipeline
Expand All @@ -40,6 +41,10 @@ workflow DALMOLINGROUP_EURYALE {
EURYALE ()
}

workflow download {
DOWNLOAD ()
}

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
RUN ALL WORKFLOWS
Expand Down
25 changes: 25 additions & 0 deletions modules/local/download/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
process DOWNLOAD {
tag "$id"

label 'process_single'
label 'error_retry_delay'

input:
val id
val url

output:
path "${prefix}", emit: db

script:
prefix = task.ext.prefix ?: "${id}"

"""
wget -O ${prefix} $url
"""

stub:
"""
touch ${prefix}
"""
}
12 changes: 12 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,18 @@ params {
skip_alignment = false
skip_microview = false

// Dowload entry options
download_functional = true
download_kaiju = true
download_kraken = false
download_host = false

functional_db = 'https://ftp.ncbi.nlm.nih.gov/blast/db/FASTA/nr.gz'
functional_dictionary = 'https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/idmapping.dat.gz'
kaiju_db_url = 'https://kaiju-idx.s3.eu-central-1.amazonaws.com/2023/kaiju_db_nr_2023-05-10.tgz'
kraken2_db_url = 'https://genome-idx.s3.amazonaws.com/kraken/k2_standard_08gb_20240112.tar.gz'
host_url = 'http://ftp.ensembl.org/pub/release-112/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz'

// MultiQC options
multiqc_config = null
multiqc_title = null
Expand Down
60 changes: 58 additions & 2 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,58 @@
}
}
},
"download_entry": {
"title": "Download Entry",
"type": "object",
"description": "",
"default": "",
"properties": {
"download_functional": {
"type": "boolean",
"default": true,
"description": "Whether to dowload functional references"
},
"download_kaiju": {
"type": "boolean",
"default": true,
"description": "Whether to dowload the Kaiju reference db"
},
"download_kraken": {
"type": "boolean",
"description": "Whether to dowload the Kraken2 reference db"
},
"download_host": {
"type": "boolean",
"description": "Whether to download the host reference genome"
},
"functional_db": {
"type": "string",
"default": "https://ftp.ncbi.nlm.nih.gov/blast/db/FASTA/nr.gz",
"description": "Functional reference URL (download entry)"
},
"functional_dictionary": {
"type": "string",
"default": "https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/idmapping.dat.gz",
"description": "Functional dictionary URL (download entry)"
},
"kaiju_db_url": {
"type": "string",
"default": "https://kaiju-idx.s3.eu-central-1.amazonaws.com/2023/kaiju_db_nr_2023-05-10.tgz",
"description": "Kaiju reference URL (download entry)"
},
"kraken2_db_url": {
"type": "string",
"default": "https://genome-idx.s3.amazonaws.com/kraken/k2_standard_08gb_20240112.tar.gz",
"description": "Kraken2 reference URL (download entry)"
},
"host_url": {
"type": "string",
"default": "http://ftp.ensembl.org/pub/release-112/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz",
"description": "Host FASTA reference URL (download entry)"
}
},
"fa_icon": "fas fa-database"
},
"max_job_request_options": {
"title": "Max job request options",
"type": "object",
Expand Down Expand Up @@ -352,7 +404,8 @@
"type": "string",
"description": "Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file",
"fa_icon": "fas fa-image",
"hidden": true
"hidden": true,
"default": "/home/jvfe/dev/pesquisa/dalmolingroup-euryale/assets/euryale_logo.png"
},
"multiqc_methods_description": {
"type": "string",
Expand All @@ -362,7 +415,7 @@
"tracedir": {
"type": "string",
"description": "Directory to keep pipeline Nextflow logs and reports.",
"default": "${params.outdir}/pipeline_info",
"default": "null/pipeline_info",
"fa_icon": "fas fa-cogs",
"hidden": true
},
Expand Down Expand Up @@ -413,6 +466,9 @@
{
"$ref": "#/definitions/reference_genome_options"
},
{
"$ref": "#/definitions/download_entry"
},
{
"$ref": "#/definitions/max_job_request_options"
},
Expand Down
45 changes: 45 additions & 0 deletions workflows/download.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
VALIDATE INPUTS
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params)

// Validate input parameters
WorkflowEuryale.initialise(params, log)

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
IMPORT LOCAL MODULES/SUBWORKFLOWS
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

//
// MODULES
//

include { DOWNLOAD as DOWNLOAD_FUNCTIONAL_DB } from '../modules/local/download/main'
include { DOWNLOAD as DOWNLOAD_FUNCTIONAL_DICT } from '../modules/local/download/main'
include { DOWNLOAD as DOWNLOAD_KAIJU } from '../modules/local/download/main'
include { DOWNLOAD as DOWNLOAD_KRAKEN } from '../modules/local/download/main'
include { DOWNLOAD as DOWNLOAD_HOST } from '../modules/local/download/main'

workflow DOWNLOAD {
if (params.download_functional) {
DOWNLOAD_FUNCTIONAL_DB("reference_fasta.fa.gz", params.functional_db)
DOWNLOAD_FUNCTIONAL_DICT("id_mapping.tab.gz", params.functional_dictionary)
}

if (params.download_kaiju) {
DOWNLOAD_KAIJU("kaiju_db.tar.gz", params.kaiju_db_url)
}

if (params.download_kraken) {
DOWNLOAD_KRAKEN("kraken2_db.tar.gz", params.kraken2_db_url)
}

if (params.download_host) {
DOWNLOAD_HOST("host_fasta.fa.gz", params.host_url)
}
}

0 comments on commit 6d0ab8b

Please sign in to comment.