From 42e3d600f55aa130a9a5953a199298a76f32c786 Mon Sep 17 00:00:00 2001 From: MartinPippel Date: Mon, 15 Jan 2024 14:31:58 +0100 Subject: [PATCH 1/3] add development branch - and start to work with gitpod --- config/annotation_preprocessing_modules.config | 2 +- modules.json | 2 +- modules/nf-core/busco/environment.yml | 7 +++++++ modules/nf-core/busco/main.nf | 13 +++++++++---- modules/nf-core/busco/meta.yml | 13 ++++++++++--- subworkflows/annotation_preprocessing/main.nf | 3 ++- 6 files changed, 30 insertions(+), 10 deletions(-) create mode 100644 modules/nf-core/busco/environment.yml diff --git a/config/annotation_preprocessing_modules.config b/config/annotation_preprocessing_modules.config index b73fd818..3c202dc9 100644 --- a/config/annotation_preprocessing_modules.config +++ b/config/annotation_preprocessing_modules.config @@ -19,7 +19,7 @@ process { withName: 'BUSCO' { time = 2.d tag = { "$lineage:$meta.id" } - ext.args = '--mode genome --tar' + ext.args = '--tar' publishDir = [ path: "${params.outdir}/${publish_subdir}/busco", mode: params.publishDir_mode, diff --git a/modules.json b/modules.json index bd029df7..06919696 100644 --- a/modules.json +++ b/modules.json @@ -12,7 +12,7 @@ }, "busco": { "branch": "master", - "git_sha": "6d6552cb582f56b6101c452e16ee7c23073f91de", + "git_sha": "e3126f437c336c826f242842fe51769cfce0ec2d", "installed_by": ["modules"] }, "fastp": { diff --git a/modules/nf-core/busco/environment.yml b/modules/nf-core/busco/environment.yml new file mode 100644 index 00000000..f872d057 --- /dev/null +++ b/modules/nf-core/busco/environment.yml @@ -0,0 +1,7 @@ +name: busco +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::busco=5.5.0 diff --git a/modules/nf-core/busco/main.nf b/modules/nf-core/busco/main.nf index 95586b03..e7100405 100644 --- a/modules/nf-core/busco/main.nf +++ b/modules/nf-core/busco/main.nf @@ -2,13 +2,14 @@ process BUSCO { tag "$meta.id" label 'process_medium' - conda "bioconda::busco=5.4.3" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/busco:5.4.3--pyhdfd78af_0': - 'biocontainers/busco:5.4.3--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/busco:5.5.0--pyhdfd78af_0': + 'biocontainers/busco:5.5.0--pyhdfd78af_0' }" input: tuple val(meta), path('tmp_input/*') + val mode // Required: One of genome, proteins, or transcriptome val lineage // Required: lineage to check against, "auto" enables --auto-lineage instead path busco_lineages_path // Recommended: path to busco lineages - downloads if not set path config_file // Optional: busco configuration file @@ -29,11 +30,14 @@ process BUSCO { task.ext.when == null || task.ext.when script: + if ( mode !in [ 'genome', 'proteins', 'transcriptome' ] ) { + error "Mode must be one of 'genome', 'proteins', or 'transcriptome'." + } def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}-${lineage}" def busco_config = config_file ? "--config $config_file" : '' def busco_lineage = lineage.equals('auto') ? '--auto-lineage' : "--lineage_dataset ${lineage}" - def busco_lineage_dir = busco_lineages_path ? "--offline --download_path ${busco_lineages_path}" : '' + def busco_lineage_dir = busco_lineages_path ? "--download_path ${busco_lineages_path}" : '' """ # Nextflow changes the container --entrypoint to /bin/bash (container default entrypoint: /usr/local/env-execute) # Check for container variable initialisation script and source it. @@ -69,6 +73,7 @@ process BUSCO { --cpu $task.cpus \\ --in "\$INPUT_SEQS" \\ --out ${prefix}-busco \\ + --mode $mode \\ $busco_lineage \\ $busco_lineage_dir \\ $busco_config \\ diff --git a/modules/nf-core/busco/meta.yml b/modules/nf-core/busco/meta.yml index 77d15fbd..90b30d4d 100644 --- a/modules/nf-core/busco/meta.yml +++ b/modules/nf-core/busco/meta.yml @@ -13,7 +13,6 @@ tools: tool_dev_url: https://gitlab.com/ezlab/busco doi: "10.1007/978-1-4939-9173-0_14" licence: ["MIT"] - input: - meta: type: map @@ -24,6 +23,10 @@ input: type: file description: Nucleic or amino acid sequence file in FASTA format. pattern: "*.{fasta,fna,fa,fasta.gz,fna.gz,fa.gz}" + - mode: + type: string + description: The mode to run Busco in. One of genome, proteins, or transcriptome + pattern: "{genome,proteins,transcriptome}" - lineage: type: string description: The BUSCO lineage to use, or "auto" to automatically select lineage @@ -33,7 +36,6 @@ input: - config_file: type: file description: Path to BUSCO config file. - output: - meta: type: map @@ -80,10 +82,15 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@priyanka-surana" - "@charles-plessy" - "@mahesh-panchal" - "@muffato" - "@jvhagey" +maintainers: + - "@priyanka-surana" + - "@charles-plessy" + - "@mahesh-panchal" + - "@muffato" + - "@jvhagey" diff --git a/subworkflows/annotation_preprocessing/main.nf b/subworkflows/annotation_preprocessing/main.nf index 25b42426..60dcaaae 100644 --- a/subworkflows/annotation_preprocessing/main.nf +++ b/subworkflows/annotation_preprocessing/main.nf @@ -21,9 +21,10 @@ workflow ANNOTATION_PREPROCESSING { ASSEMBLY_PURIFY.out.fasta .combine( ch_busco_lineage ) .multiMap { fasta, lineage -> - ch_fasta: [ [ id: fasta.baseName ], fasta ] + ch_fasta: [ [ id: fasta.baseName ], fasta ] ch_busco: lineage }, + 'genome', params.busco_lineages_path ? file( params.busco_lineages_path, checkIfExists: true ) : [], [] ) From 69d5e7c47f013dd4b78b24b3fa38a2f5051b4416 Mon Sep 17 00:00:00 2001 From: Martin Pippel Date: Mon, 15 Jan 2024 14:27:29 +0000 Subject: [PATCH 2/3] adapat annotation_preprocessing subworkflow to new busco pipeline --- subworkflows/annotation_preprocessing/main.nf | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/subworkflows/annotation_preprocessing/main.nf b/subworkflows/annotation_preprocessing/main.nf index 60dcaaae..c404d36a 100644 --- a/subworkflows/annotation_preprocessing/main.nf +++ b/subworkflows/annotation_preprocessing/main.nf @@ -17,14 +17,17 @@ workflow ANNOTATION_PREPROCESSING { ASSEMBLY_PURIFY( genome_assembly ) ASSEMBLY_STATS( genome_assembly.mix( ASSEMBLY_PURIFY.out.fasta ) ) - BUSCO( - ASSEMBLY_PURIFY.out.fasta - .combine( ch_busco_lineage ) - .multiMap { fasta, lineage -> - ch_fasta: [ [ id: fasta.baseName ], fasta ] - ch_busco: lineage - }, - 'genome', + + ch_busco_in = ASSEMBLY_PURIFY.out.fasta + .combine( ch_busco_lineage ) + .multiMap { fasta, lineage -> + fasta: [ [ id: fasta.baseName ], fasta ] + lineage: lineage + } + BUSCO ( + ch_busco_in.fasta, + "genome", + ch_busco_in.lineage, params.busco_lineages_path ? file( params.busco_lineages_path, checkIfExists: true ) : [], [] ) From 9838f76e46939dac22049732648a730336f04867 Mon Sep 17 00:00:00 2001 From: Martin Pippel Date: Mon, 15 Jan 2024 17:16:28 +0100 Subject: [PATCH 3/3] Update subworkflows/annotation_preprocessing/main.nf Co-authored-by: Mahesh Binzer-Panchal --- subworkflows/annotation_preprocessing/main.nf | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/subworkflows/annotation_preprocessing/main.nf b/subworkflows/annotation_preprocessing/main.nf index c404d36a..d85c240d 100644 --- a/subworkflows/annotation_preprocessing/main.nf +++ b/subworkflows/annotation_preprocessing/main.nf @@ -19,11 +19,11 @@ workflow ANNOTATION_PREPROCESSING { ASSEMBLY_STATS( genome_assembly.mix( ASSEMBLY_PURIFY.out.fasta ) ) ch_busco_in = ASSEMBLY_PURIFY.out.fasta - .combine( ch_busco_lineage ) - .multiMap { fasta, lineage -> - fasta: [ [ id: fasta.baseName ], fasta ] - lineage: lineage - } + .combine( ch_busco_lineage ) + .multiMap { fasta, lineage -> + fasta: [ [ id: fasta.baseName ], fasta ] + lineage: lineage + } BUSCO ( ch_busco_in.fasta, "genome",