From 7707cfbe7cc2d7677460704cb4725bf36698f527 Mon Sep 17 00:00:00 2001 From: dorien-er Date: Tue, 6 Feb 2024 15:41:14 +0100 Subject: [PATCH 1/7] add script and config to list busco datasets --- src/busco/{ => busco}/config.vsh.yaml | 0 src/busco/{ => busco}/help.txt | 0 src/busco/{ => busco}/script.sh | 0 src/busco/{ => busco}/test.sh | 0 src/busco/{ => busco}/test_data/genome.fna | 0 src/busco/{ => busco}/test_data/protein.fasta | 0 src/busco/{ => busco}/test_data/script.sh | 0 src/busco/busco_list_datasets/config.vsh.yaml | 36 +++++++++++++++++++ src/busco/busco_list_datasets/script.sh | 6 ++++ src/busco/busco_list_datasets/test.sh | 6 ++++ 10 files changed, 48 insertions(+) rename src/busco/{ => busco}/config.vsh.yaml (100%) rename src/busco/{ => busco}/help.txt (100%) rename src/busco/{ => busco}/script.sh (100%) rename src/busco/{ => busco}/test.sh (100%) rename src/busco/{ => busco}/test_data/genome.fna (100%) rename src/busco/{ => busco}/test_data/protein.fasta (100%) rename src/busco/{ => busco}/test_data/script.sh (100%) create mode 100644 src/busco/busco_list_datasets/config.vsh.yaml create mode 100644 src/busco/busco_list_datasets/script.sh create mode 100644 src/busco/busco_list_datasets/test.sh diff --git a/src/busco/config.vsh.yaml b/src/busco/busco/config.vsh.yaml similarity index 100% rename from src/busco/config.vsh.yaml rename to src/busco/busco/config.vsh.yaml diff --git a/src/busco/help.txt b/src/busco/busco/help.txt similarity index 100% rename from src/busco/help.txt rename to src/busco/busco/help.txt diff --git a/src/busco/script.sh b/src/busco/busco/script.sh similarity index 100% rename from src/busco/script.sh rename to src/busco/busco/script.sh diff --git a/src/busco/test.sh b/src/busco/busco/test.sh similarity index 100% rename from src/busco/test.sh rename to src/busco/busco/test.sh diff --git a/src/busco/test_data/genome.fna b/src/busco/busco/test_data/genome.fna similarity index 100% rename from src/busco/test_data/genome.fna rename to src/busco/busco/test_data/genome.fna diff --git a/src/busco/test_data/protein.fasta b/src/busco/busco/test_data/protein.fasta similarity index 100% rename from src/busco/test_data/protein.fasta rename to src/busco/busco/test_data/protein.fasta diff --git a/src/busco/test_data/script.sh b/src/busco/busco/test_data/script.sh similarity index 100% rename from src/busco/test_data/script.sh rename to src/busco/busco/test_data/script.sh diff --git a/src/busco/busco_list_datasets/config.vsh.yaml b/src/busco/busco_list_datasets/config.vsh.yaml new file mode 100644 index 00000000..444e2a6d --- /dev/null +++ b/src/busco/busco_list_datasets/config.vsh.yaml @@ -0,0 +1,36 @@ +functionality: + name: busco + description: Lists the available busco datasets + info: + keywords: [lineage datasets] + homepage: https://busco.ezlab.org/ + documentation: https://busco.ezlab.org/busco_userguide.html + repository: https://gitlab.com/ezlab/busco + reference: "10.1007/978-1-4939-9173-0_14" + licence: MIT + argument_groups: + - name: Outputs + arguments: + - name: --output + alternatives: ["-o"] + direction: output + type: file + description: | + Output file of the available busco datasets + required: false + default: busco_dataset_list.txt + example: file.txt + resources: + - type: bash_script + path: script.sh + test_resources: + - type: bash_script + path: test.sh +platforms: + - type: docker + image: quay.io/biocontainers/busco:5.6.1--pyhdfd78af_0 + setup: + - type: docker + run: | + busco --version | sed 's/BUSCO\s\(.*\)/busco: "\1"/' > /var/software_versions.txt + - type: nextflow diff --git a/src/busco/busco_list_datasets/script.sh b/src/busco/busco_list_datasets/script.sh new file mode 100644 index 00000000..6c80725c --- /dev/null +++ b/src/busco/busco_list_datasets/script.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +## VIASH START +## VIASH END + +busco --list-datasets | awk '/^#{40}/{flag=1; next} flag{print}' > $par_output \ No newline at end of file diff --git a/src/busco/busco_list_datasets/test.sh b/src/busco/busco_list_datasets/test.sh new file mode 100644 index 00000000..053f41bc --- /dev/null +++ b/src/busco/busco_list_datasets/test.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +## VIASH START +## VIASH END + +busco --list-datasets > $par_output \ No newline at end of file From a858cc1ecd3cb69b82160d298b93f2e12476d55d Mon Sep 17 00:00:00 2001 From: dorien-er Date: Tue, 6 Feb 2024 15:46:14 +0100 Subject: [PATCH 2/7] update test --- src/busco/busco_list_datasets/test.sh | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/busco/busco_list_datasets/test.sh b/src/busco/busco_list_datasets/test.sh index 053f41bc..c67a7801 100644 --- a/src/busco/busco_list_datasets/test.sh +++ b/src/busco/busco_list_datasets/test.sh @@ -3,4 +3,11 @@ ## VIASH START ## VIASH END -busco --list-datasets > $par_output \ No newline at end of file +"$meta_executable" \ + --output datasets.txt + +echo ">> Checking output" +[ ! -f "datasets.txt" ] && echo "datasets.txt does not exist" && exit 1 + +echo ">> Checking if output is empty" +[ ! -s "datasets.txt" ] && echo "datasets.txt is empty" && exit 1 From 1920c9ac931e0b9b6f1ea0c7eef1ca8bd600a04b Mon Sep 17 00:00:00 2001 From: dorien-er Date: Tue, 6 Feb 2024 15:54:30 +0100 Subject: [PATCH 3/7] update busco to point to busco_list_datasets component --- src/busco/busco/config.vsh.yaml | 2 +- src/busco/busco_list_datasets/test.sh | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/busco/busco/config.vsh.yaml b/src/busco/busco/config.vsh.yaml index fba14892..7db602d5 100644 --- a/src/busco/busco/config.vsh.yaml +++ b/src/busco/busco/config.vsh.yaml @@ -35,7 +35,7 @@ functionality: required: false description: | Specify a BUSCO lineage dataset that is most closely related to the assembly or gene set being assessed. - The full list of available datasets can be viewed [here](https://busco-data.ezlab.org/v5/data/lineages/) or by running `busco --list-datasets` (which requires installing the tool). + The full list of available datasets can be viewed [here](https://busco-data.ezlab.org/v5/data/lineages/) or by running the busco/busco_list_datasets component. When unsure, the "--auto_lineage" flag can be set to automatically find the optimal lineage path. Requested datasets will automatically be downloaded if not already present in the download folder. example: stramenopiles_odb10 diff --git a/src/busco/busco_list_datasets/test.sh b/src/busco/busco_list_datasets/test.sh index c67a7801..c303cd77 100644 --- a/src/busco/busco_list_datasets/test.sh +++ b/src/busco/busco_list_datasets/test.sh @@ -11,3 +11,5 @@ echo ">> Checking output" echo ">> Checking if output is empty" [ ! -s "datasets.txt" ] && echo "datasets.txt is empty" && exit 1 + +rm datasets.txt \ No newline at end of file From aa7d5987ada24d2b3570da609311278bfe083fb2 Mon Sep 17 00:00:00 2001 From: dorien-er Date: Tue, 6 Feb 2024 16:05:27 +0100 Subject: [PATCH 4/7] add changelog entry --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 276195c2..4255bd7b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ * `busco`: Assess genome assembly and annotation completeness with single copy orthologs (PR #6). +* `busco_list_datasets`: Lists available busco datasets (PR #18) + * `featurecounts`: Assign sequence reads to genomic features (PR #11). * `bgzip`: Add bgzip functionality to compress and decompress files (PR #13). From feaa98c46ff10e787b69c8b1e76c47d8fdf06798 Mon Sep 17 00:00:00 2001 From: dorien-er Date: Fri, 9 Feb 2024 17:59:49 +0100 Subject: [PATCH 5/7] update changelog --- CHANGELOG.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4255bd7b..accf1ff1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,9 +8,9 @@ * `fastp`: An ultra-fast all-in-one FASTQ preprocessor (PR #3). -* `busco`: Assess genome assembly and annotation completeness with single copy orthologs (PR #6). - -* `busco_list_datasets`: Lists available busco datasets (PR #18) +* `busco`: + - `busco/busco`: Assess genome assembly and annotation completeness with single copy orthologs (PR #6). + - `busco_list_datasets`: Lists available busco datasets (PR #18) * `featurecounts`: Assign sequence reads to genomic features (PR #11). From 023140afb23a992d7c480dc4dfec77077a85bd8b Mon Sep 17 00:00:00 2001 From: Dorien <41797896+dorien-er@users.noreply.github.com> Date: Sat, 10 Feb 2024 14:06:13 +0100 Subject: [PATCH 6/7] Update CHANGELOG.md Co-authored-by: Robrecht Cannoodt --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index accf1ff1..9d31a2da 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,8 +9,8 @@ * `fastp`: An ultra-fast all-in-one FASTQ preprocessor (PR #3). * `busco`: - - `busco/busco`: Assess genome assembly and annotation completeness with single copy orthologs (PR #6). - - `busco_list_datasets`: Lists available busco datasets (PR #18) + - `busco/busco_run`: Assess genome assembly and annotation completeness with single copy orthologs (PR #6). + - `busco/busco_list_datasets`: Lists available busco datasets (PR #18) * `featurecounts`: Assign sequence reads to genomic features (PR #11). From cf131d4af49868ae16ba27f28c6bdf1d84a5aa54 Mon Sep 17 00:00:00 2001 From: dorien-er Date: Sat, 10 Feb 2024 14:08:11 +0100 Subject: [PATCH 7/7] rename busco to busco run --- src/busco/{busco => busco_run}/config.vsh.yaml | 2 +- src/busco/{busco => busco_run}/help.txt | 0 src/busco/{busco => busco_run}/script.sh | 0 src/busco/{busco => busco_run}/test.sh | 0 src/busco/{busco => busco_run}/test_data/genome.fna | 0 src/busco/{busco => busco_run}/test_data/protein.fasta | 0 src/busco/{busco => busco_run}/test_data/script.sh | 0 7 files changed, 1 insertion(+), 1 deletion(-) rename src/busco/{busco => busco_run}/config.vsh.yaml (99%) rename src/busco/{busco => busco_run}/help.txt (100%) rename src/busco/{busco => busco_run}/script.sh (100%) rename src/busco/{busco => busco_run}/test.sh (100%) rename src/busco/{busco => busco_run}/test_data/genome.fna (100%) rename src/busco/{busco => busco_run}/test_data/protein.fasta (100%) rename src/busco/{busco => busco_run}/test_data/script.sh (100%) diff --git a/src/busco/busco/config.vsh.yaml b/src/busco/busco_run/config.vsh.yaml similarity index 99% rename from src/busco/busco/config.vsh.yaml rename to src/busco/busco_run/config.vsh.yaml index 7db602d5..2297fc2d 100644 --- a/src/busco/busco/config.vsh.yaml +++ b/src/busco/busco_run/config.vsh.yaml @@ -1,5 +1,5 @@ functionality: - name: busco + name: busco_run description: Assessment of genome assembly and annotation completeness with single copy orthologs info: keywords: [Genome assembly, quality control] diff --git a/src/busco/busco/help.txt b/src/busco/busco_run/help.txt similarity index 100% rename from src/busco/busco/help.txt rename to src/busco/busco_run/help.txt diff --git a/src/busco/busco/script.sh b/src/busco/busco_run/script.sh similarity index 100% rename from src/busco/busco/script.sh rename to src/busco/busco_run/script.sh diff --git a/src/busco/busco/test.sh b/src/busco/busco_run/test.sh similarity index 100% rename from src/busco/busco/test.sh rename to src/busco/busco_run/test.sh diff --git a/src/busco/busco/test_data/genome.fna b/src/busco/busco_run/test_data/genome.fna similarity index 100% rename from src/busco/busco/test_data/genome.fna rename to src/busco/busco_run/test_data/genome.fna diff --git a/src/busco/busco/test_data/protein.fasta b/src/busco/busco_run/test_data/protein.fasta similarity index 100% rename from src/busco/busco/test_data/protein.fasta rename to src/busco/busco_run/test_data/protein.fasta diff --git a/src/busco/busco/test_data/script.sh b/src/busco/busco_run/test_data/script.sh similarity index 100% rename from src/busco/busco/test_data/script.sh rename to src/busco/busco_run/test_data/script.sh