From 1f076bdff73cb9af8151ae3b014f113e69e66f4d Mon Sep 17 00:00:00 2001 From: Dries Schaumont <5946712+DriesSchaumont@users.noreply.github.com> Date: Mon, 1 Jul 2024 20:35:08 +0200 Subject: [PATCH 1/2] FEAT + BUG: cutadapt; allowing disabling demultiplexing and fix par_quality_cutoff_r2 (#69) * FEAT: Disable cutadapt demultiplexing by default * Cutadapt: fix --par_quality_cutoff_r2 --- CHANGELOG.md | 5 +++++ src/cutadapt/config.vsh.yaml | 18 ++++++++++++++++++ src/cutadapt/script.sh | 31 ++++++++++++++++++++++++++----- src/cutadapt/test.sh | 5 +++++ 4 files changed, 54 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bfe2b1b6..b5c403af 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,10 @@ * `pear`: fix component not exiting with the correct exitcode when PEAR fails. +* `cutadapt`: fix `--par_quality_cutoff_r2` argument. + +* `cutadapt`: demultiplexing is now disabled by default. It can be re-enabled by using `demultiplex_mode`. + # biobox 0.1.0 ## BREAKING CHANGES @@ -12,6 +16,7 @@ Viash 0.9.0 in order to avoid issues with the current default separator `:` unintentionally splitting up certain file paths. + ## NEW FEATURES * `arriba`: Detect gene fusions from RNA-seq data (PR #1). diff --git a/src/cutadapt/config.vsh.yaml b/src/cutadapt/config.vsh.yaml index a62f0aa9..b315d0ce 100644 --- a/src/cutadapt/config.vsh.yaml +++ b/src/cutadapt/config.vsh.yaml @@ -240,6 +240,24 @@ argument_groups: Check both the read and its reverse complement for adapter matches. If match is on reverse-complemented version, output that one. + + #################################################################### + - name: "Demultiplexing options" + arguments: + - name: "--demultiplex_mode" + type: string + choices: ["single", "unique_dual", "combinatorial_dual"] + required: false + description: | + Enable demultiplexing and set the mode for it. + With mode 'unique_dual', adapters from the first and second read are used, + and the indexes from the reads are only used in pairs. This implies + --pair_adapters. + Enabling mode 'combinatorial_dual' allows all combinations of the sets of indexes + on R1 and R2. It is necessary to write each read pair to an output + file depending on the adapters found on both R1 and R2. + Mode 'single', uses indexes or barcodes located at the 5' + end of the R1 read (single). #################################################################### - name: Read modifications diff --git a/src/cutadapt/script.sh b/src/cutadapt/script.sh index 5e1f9e30..20c92724 100644 --- a/src/cutadapt/script.sh +++ b/src/cutadapt/script.sh @@ -127,7 +127,7 @@ mod_args=$(echo \ ${par_cut_r2:+--cut_r2 "${par_cut_r2}"} \ ${par_nextseq_trim:+--nextseq-trim "${par_nextseq_trim}"} \ ${par_quality_cutoff:+--quality-cutoff "${par_quality_cutoff}"} \ - ${par_quality_cutoff_r2:+--quality-cutoff_r2 "${par_quality_cutoff_r2}"} \ + ${par_quality_cutoff_r2:+-Q "${par_quality_cutoff_r2}"} \ ${par_quality_base:+--quality-base "${par_quality_base}"} \ ${par_poly_a:+--poly-a} \ ${par_length:+--length "${par_length}"} \ @@ -196,14 +196,35 @@ else ext="fasta" fi -if [ $mode = "se" ]; then +demultiplex_mode="$par_demultiplex_mode" +if [[ $mode == "se" ]]; then + if [[ "$demultiplex_mode" == "unique_dual" ]] || [[ "$demultiplex_mode" == "combinatorial_dual" ]]; then + echo "Demultiplexing dual indexes is not possible with single-end data." + exit 1 + fi + prefix="trimmed_" + if [[ ! -z "$demultiplex_mode" ]]; then + prefix="{name}_" + fi output_args=$(echo \ - --output "$output_dir/{name}_001.$ext" \ + --output "$output_dir/${prefix}001.$ext" \ ) else + demultiplex_indicator_r1='{name}_' + demultiplex_indicator_r2=$demultiplex_indicator_r1 + if [[ "$demultiplex_mode" == "combinatorial_dual" ]]; then + demultiplex_indicator_r1='{name1}_{name2}_' + demultiplex_indicator_r2='{name1}_{name2}_' + fi + prefix_r1="trimmed_" + prefix_r2="trimmed_" + if [[ ! -z "$demultiplex_mode" ]]; then + prefix_r1=$demultiplex_indicator_r1 + prefix_r2=$demultiplex_indicator_r2 + fi output_args=$(echo \ - --output "$output_dir/{name}_R1_001.$ext" \ - --paired-output "$output_dir/{name}_R2_001.$ext" \ + --output "$output_dir/${prefix_r1}R1_001.$ext" \ + --paired-output "$output_dir/${prefix_r2}R2_001.$ext" \ ) fi diff --git a/src/cutadapt/test.sh b/src/cutadapt/test.sh index eff997d7..1d6d9c18 100644 --- a/src/cutadapt/test.sh +++ b/src/cutadapt/test.sh @@ -1,6 +1,7 @@ #!/bin/bash set -e +set -eo pipefail ############################################# # helper functions @@ -57,6 +58,7 @@ EOF --adapter ADAPTER \ --input example.fa \ --fasta \ + --demultiplex_mode single \ --no_match_adapter_wildcards \ --json @@ -101,6 +103,7 @@ EOF --output "out_test1/*.fasta" \ --adapter ADAPTER \ --input example.fa \ + --demultiplex_mode single \ --fasta \ --no_match_adapter_wildcards \ --json @@ -160,6 +163,7 @@ EOF --adapter AAAAA \ --adapter_fasta adapters1.fasta \ --adapter_fasta adapters2.fasta \ + --demultiplex_mode single \ --input example.fa \ --fasta \ --json @@ -224,6 +228,7 @@ EOF --input example_R1.fastq \ --input_r2 example_R2.fastq \ --quality_cutoff 20 \ + --demultiplex_mode unique_dual \ --json \ ---cpus 1 From ec69d9af7a59c2618a49bef7d0bf9afc743ca065 Mon Sep 17 00:00:00 2001 From: Dries Schaumont <5946712+DriesSchaumont@users.noreply.github.com> Date: Mon, 1 Jul 2024 21:58:03 +0200 Subject: [PATCH 2/2] FEAT: update busco to 5.7.1 (#72) * FEAT: update busco to 5.7.1 * Typo --- CHANGELOG.md | 4 ++++ src/busco/busco_download_datasets/config.vsh.yaml | 2 +- src/busco/busco_list_datasets/config.vsh.yaml | 2 +- src/busco/busco_run/config.vsh.yaml | 6 +++++- src/busco/busco_run/help.txt | 9 ++++++--- src/busco/busco_run/script.sh | 1 + 6 files changed, 18 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b5c403af..3a036fba 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,10 @@ * `cutadapt`: demultiplexing is now disabled by default. It can be re-enabled by using `demultiplex_mode`. +## MINOR CHANGES + +* `busco` components: update BUSCO to `5.7.1`. + # biobox 0.1.0 ## BREAKING CHANGES diff --git a/src/busco/busco_download_datasets/config.vsh.yaml b/src/busco/busco_download_datasets/config.vsh.yaml index 04d76dd6..5297af2e 100644 --- a/src/busco/busco_download_datasets/config.vsh.yaml +++ b/src/busco/busco_download_datasets/config.vsh.yaml @@ -37,7 +37,7 @@ test_resources: path: test.sh engines: - type: docker - image: quay.io/biocontainers/busco:5.6.1--pyhdfd78af_0 + image: quay.io/biocontainers/busco:5.7.1--pyhdfd78af_0 setup: - type: docker run: | diff --git a/src/busco/busco_list_datasets/config.vsh.yaml b/src/busco/busco_list_datasets/config.vsh.yaml index 6ada7c84..cac34cc6 100644 --- a/src/busco/busco_list_datasets/config.vsh.yaml +++ b/src/busco/busco_list_datasets/config.vsh.yaml @@ -29,7 +29,7 @@ test_resources: path: test.sh engines: - type: docker - image: quay.io/biocontainers/busco:5.6.1--pyhdfd78af_0 + image: quay.io/biocontainers/busco:5.7.1--pyhdfd78af_0 setup: - type: docker run: | diff --git a/src/busco/busco_run/config.vsh.yaml b/src/busco/busco_run/config.vsh.yaml index d79f03f5..23ee95fb 100644 --- a/src/busco/busco_run/config.vsh.yaml +++ b/src/busco/busco_run/config.vsh.yaml @@ -181,6 +181,10 @@ argument_groups: - name: MetaEuk Settings arguments: + - name: --metaeuk + type: boolean_true + description: | + Use Metaeuk gene predictor. - name: --metaeuk_parameters type: string description: | @@ -204,7 +208,7 @@ test_resources: path: test_data engines: - type: docker - image: quay.io/biocontainers/busco:5.6.1--pyhdfd78af_0 + image: quay.io/biocontainers/busco:5.7.1--pyhdfd78af_0 setup: - type: docker run: | diff --git a/src/busco/busco_run/help.txt b/src/busco/busco_run/help.txt index 2cacec4d..6d83f9be 100644 --- a/src/busco/busco_run/help.txt +++ b/src/busco/busco_run/help.txt @@ -2,7 +2,9 @@ busco -h ``` -Welcome to BUSCO 5.6.1: the Benchmarking Universal Single-Copy Ortholog assessment tool. +usage: busco -i [SEQUENCE_FILE] -l [LINEAGE] -o [OUTPUT_NAME] -m [MODE] [OTHER OPTIONS] + +Welcome to BUSCO 5.7.1: the Benchmarking Universal Single-Copy Ortholog assessment tool. For more detailed usage information, please review the README file provided with this distribution and the BUSCO user guide. Visit this page https://gitlab.com/ezlab/busco#how-to-cite-busco to see how to cite BUSCO optional arguments: @@ -18,7 +20,7 @@ optional arguments: -l LINEAGE, --lineage_dataset LINEAGE Specify the name of the BUSCO lineage to be used. --augustus Use augustus gene predictor for eukaryote runs - --augustus_parameters --PARAM1=VALUE1,--PARAM2=VALUE2 + --augustus_parameters "--PARAM1=VALUE1,--PARAM2=VALUE2" Pass additional arguments to Augustus. All arguments should be contained within a single string with no white space, with each argument separated by a comma. --augustus_species AUGUSTUS_SPECIES Specify a species for Augustus training. @@ -42,11 +44,12 @@ optional arguments: --limit N How many candidate regions (contig or transcript) to consider per BUSCO (default: 3) --list-datasets Print the list of available BUSCO datasets --long Optimization Augustus self-training mode (Default: Off); adds considerably to the run time, but can improve results for some non-model organisms + --metaeuk Use Metaeuk gene predictor --metaeuk_parameters "--PARAM1=VALUE1,--PARAM2=VALUE2" Pass additional arguments to Metaeuk for the first run. All arguments should be contained within a single string with no white space, with each argument separated by a comma. --metaeuk_rerun_parameters "--PARAM1=VALUE1,--PARAM2=VALUE2" Pass additional arguments to Metaeuk for the second run. All arguments should be contained within a single string with no white space, with each argument separated by a comma. - --miniprot Use miniprot gene predictor + --miniprot Use Miniprot gene predictor --skip_bbtools Skip BBTools for assembly statistics --offline To indicate that BUSCO cannot attempt to download files --opt-out-run-stats Opt out of data collection. Information on the data collected is available in the user guide. diff --git a/src/busco/busco_run/script.sh b/src/busco/busco_run/script.sh index 5b562f83..a0ef24de 100644 --- a/src/busco/busco_run/script.sh +++ b/src/busco/busco_run/script.sh @@ -39,6 +39,7 @@ busco \ ${par_force:+--force} \ ${par_limit:+--limit "$par_limit"} \ ${par_long:+--long} \ + ${par_metaeuk:+--metaeuk} \ ${par_metaeuk_parameters:+--metaeuk_parameters "$par_metaeuk_parameters"} \ ${par_metaeuk_rerun_parameters:+--metaeuk_rerun_parameters "$par_metaeuk_rerun_parameters"} \ ${par_miniprot:+--miniprot} \