From 44d98ba3525f9efa11aef1d4e9e26fbd20567233 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Thu, 22 Feb 2024 06:51:23 +0100 Subject: [PATCH 01/10] change multiple_sep to ';' --- _viash.yaml | 7 ++++++- src/cutadapt/script.sh | 24 ++++++++++++------------ 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/_viash.yaml b/_viash.yaml index c59f8543..0f38a97f 100644 --- a/_viash.yaml +++ b/_viash.yaml @@ -1 +1,6 @@ -viash_version: 0.8.4 \ No newline at end of file +viash_version: 0.8.4 + +# these config mods will be added by PR #25 +config_mods: | + .functionality.arguments[.multiple == true].multiple_sep := ";" + .functionality.argument_groups[true].arguments[.multiple == true].multiple_sep := ";" \ No newline at end of file diff --git a/src/cutadapt/script.sh b/src/cutadapt/script.sh index 2ae29a3c..013a917d 100644 --- a/src/cutadapt/script.sh +++ b/src/cutadapt/script.sh @@ -30,62 +30,62 @@ fi ########################################################### multi_adapter="" -for adapter in `echo $par_adapter | tr ':' ' '`; do +for adapter in `echo $par_adapter | tr ';' ' '`; do multi_adapter="$multi_adapter --adapter $adapter" done multi_adapter_fasta="" -for adapter_fasta in `echo $par_adapter_fasta | tr ':' ' '`; do +for adapter_fasta in `echo $par_adapter_fasta | tr ';' ' '`; do multi_adapter_fasta="$multi_adapter_fasta --adapter file:$adapter_fasta" done multi_adapter_r2="" -for adapter_r2 in `echo $par_adapter_r2 | tr ':' ' '`; do +for adapter_r2 in `echo $par_adapter_r2 | tr ';' ' '`; do multi_adapter_r2="$multi_adapter_r2 --adapter_r2 $adapter_r2" done multi_adapter_fasta_r2="" -for adapter_fasta_r2 in `echo $par_adapter_fasta_r2 | tr ':' ' '`; do +for adapter_fasta_r2 in `echo $par_adapter_fasta_r2 | tr ';' ' '`; do multi_adapter_fasta_r2="$multi_adapter_fasta_r2 --adapter file:$adapter_fasta_r2" done multi_front="" -for front in `echo $par_front | tr ':' ' '`; do +for front in `echo $par_front | tr ';' ' '`; do multi_front="$multi_front --front $front" done multi_front_fasta="" -for front_fasta in `echo $par_front_fasta | tr ':' ' '`; do +for front_fasta in `echo $par_front_fasta | tr ';' ' '`; do multi_front_fasta="$multi_front_fasta --front file:$front_fasta" done multi_front_r2="" -for front_r2 in `echo $par_front_r2 | tr ':' ' '`; do +for front_r2 in `echo $par_front_r2 | tr ';' ' '`; do multi_front_r2="$multi_front_r2 --front_r2 $front_r2" done multi_front_fasta_r2="" -for front_fasta_r2 in `echo $par_front_fasta_r2 | tr ':' ' '`; do +for front_fasta_r2 in `echo $par_front_fasta_r2 | tr ';' ' '`; do multi_front_fasta_r2="$multi_front_fasta_r2 --front file:$front_fasta_r2" done multi_anywhere="" -for anywhere in `echo $par_anywhere | tr ':' ' '`; do +for anywhere in `echo $par_anywhere | tr ';' ' '`; do multi_anywhere="$multi_anywhere --anywhere $anywhere" done multi_anywhere_fasta="" -for anywhere_fasta in `echo $par_anywhere_fasta | tr ':' ' '`; do +for anywhere_fasta in `echo $par_anywhere_fasta | tr ';' ' '`; do multi_anywhere_fasta="$multi_anywhere_fasta --anywhere file:$anywhere_fasta" done multi_anywhere_r2="" -for anywhere_r2 in `echo $par_anywhere_r2 | tr ':' ' '`; do +for anywhere_r2 in `echo $par_anywhere_r2 | tr ';' ' '`; do multi_anywhere_r2="$multi_anywhere_r2 --anywhere_r2 $anywhere_r2" done multi_anywhere_fasta_r2="" -for anywhere_fasta_r2 in `echo $par_anywhere_fasta_r2 | tr ':' ' '`; do +for anywhere_fasta_r2 in `echo $par_anywhere_fasta_r2 | tr ';' ' '`; do multi_anywhere_fasta_r2="$multi_anywhere_fasta_r2 --anywhere file:$anywhere_fasta_r2" done From 550d026d785812824f0307a17099506505e62013 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Thu, 22 Feb 2024 06:52:26 +0100 Subject: [PATCH 02/10] add example --- src/cutadapt/script.sh | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/cutadapt/script.sh b/src/cutadapt/script.sh index 013a917d..2f7d16bb 100644 --- a/src/cutadapt/script.sh +++ b/src/cutadapt/script.sh @@ -1,5 +1,15 @@ #!/bin/bash +## VIASH START +par_adapter='AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC;GGATCGGAAGAGCACACGTCTGAACTCCAGTCAC' +par_input='src/cutadapt/test_data/se/a.fastq' +par_report='full' +par_json='false' +par_output='output' +par_fasta='false' +par_info_file='false' +## VIASH END + if [ -z $par_output ]; then par_output=. else From 65b47de17cb0efd5472455ca46d9ececf554ca15 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Thu, 22 Feb 2024 06:52:37 +0100 Subject: [PATCH 03/10] simplify code with a helper function --- src/cutadapt/script.sh | 105 +++++++++++++---------------------------- 1 file changed, 33 insertions(+), 72 deletions(-) diff --git a/src/cutadapt/script.sh b/src/cutadapt/script.sh index 2f7d16bb..1b5b325e 100644 --- a/src/cutadapt/script.sh +++ b/src/cutadapt/script.sh @@ -39,82 +39,43 @@ fi # - string and fasta ########################################################### -multi_adapter="" -for adapter in `echo $par_adapter | tr ';' ' '`; do - multi_adapter="$multi_adapter --adapter $adapter" -done - -multi_adapter_fasta="" -for adapter_fasta in `echo $par_adapter_fasta | tr ';' ' '`; do - multi_adapter_fasta="$multi_adapter_fasta --adapter file:$adapter_fasta" -done - -multi_adapter_r2="" -for adapter_r2 in `echo $par_adapter_r2 | tr ';' ' '`; do - multi_adapter_r2="$multi_adapter_r2 --adapter_r2 $adapter_r2" -done - -multi_adapter_fasta_r2="" -for adapter_fasta_r2 in `echo $par_adapter_fasta_r2 | tr ';' ' '`; do - multi_adapter_fasta_r2="$multi_adapter_fasta_r2 --adapter file:$adapter_fasta_r2" -done - -multi_front="" -for front in `echo $par_front | tr ';' ' '`; do - multi_front="$multi_front --front $front" -done - -multi_front_fasta="" -for front_fasta in `echo $par_front_fasta | tr ';' ' '`; do - multi_front_fasta="$multi_front_fasta --front file:$front_fasta" -done - -multi_front_r2="" -for front_r2 in `echo $par_front_r2 | tr ';' ' '`; do - multi_front_r2="$multi_front_r2 --front_r2 $front_r2" -done - -multi_front_fasta_r2="" -for front_fasta_r2 in `echo $par_front_fasta_r2 | tr ';' ' '`; do - multi_front_fasta_r2="$multi_front_fasta_r2 --front file:$front_fasta_r2" -done - -multi_anywhere="" -for anywhere in `echo $par_anywhere | tr ';' ' '`; do - multi_anywhere="$multi_anywhere --anywhere $anywhere" -done - -multi_anywhere_fasta="" -for anywhere_fasta in `echo $par_anywhere_fasta | tr ';' ' '`; do - multi_anywhere_fasta="$multi_anywhere_fasta --anywhere file:$anywhere_fasta" -done - -multi_anywhere_r2="" -for anywhere_r2 in `echo $par_anywhere_r2 | tr ';' ' '`; do - multi_anywhere_r2="$multi_anywhere_r2 --anywhere_r2 $anywhere_r2" -done - -multi_anywhere_fasta_r2="" -for anywhere_fasta_r2 in `echo $par_anywhere_fasta_r2 | tr ';' ' '`; do - multi_anywhere_fasta_r2="$multi_anywhere_fasta_r2 --anywhere file:$anywhere_fasta_r2" -done +function add_flags { + local arg=$1 + local flag=$2 + local prefix=$3 + [[ -z $prefix ]] && prefix="" + + # This function should not be called if the input is empty + # but check for it just in case + if [[ -z $arg ]]; then + return + fi + + local output="" + IFS=';' read -r -a array <<< "$arg" + for a in "${array[@]}"; do + output="$output $flag $prefix$a" + done + echo $output +} echo ">> Parsing arguments dealing with adapters" adapter_args=$(echo \ - ${par_adapter:+${multi_adapter}} \ - ${par_adapter_fasta:+${multi_adapter_fasta}} \ - ${par_front:+${multi_front}} \ - ${par_front_fasta:+${multi_front_fasta}} \ - ${par_anywhere:+${multi_anywhere}} \ - ${par_anywhere_fasta:+${multi_anywhere_fasta}} \ - - ${par_adapter_r2:+${multi_adapter_r2}} \ - ${par_adapter_fasta_r2:+${multi_adapter_fasta_r2}} \ - ${par_front_r2:+${multi_front_r2}} \ - ${par_front_fasta_r2:+${multi_front_fasta_r2}} \ - ${par_anywhere_r2:+${multi_anywhere_r2}} \ - ${par_anywhere_fasta_r2:+${multi_anywhere_fasta_r2}} \ + ${par_adapter:+$(add_flags "$par_adapter" "--adapter")} \ + ${par_adapter_fasta:+$(add_flags "$par_adapter_fasta" "--adapter" "file:")} \ + ${par_front:+$(add_flags "$par_front" "--front")} \ + ${par_front_fasta:+$(add_flags "$par_front_fasta" "--front" "file:")} \ + ${par_anywhere:+$(add_flags "$par_anywhere" "--anywhere")} \ + ${par_anywhere_fasta:+$(add_flags "$par_anywhere_fasta" "--anywhere" "file:")} \ + + ${par_adapter_r2:+$(add_flags "$par_adapter_r2" "--adapter_r2")} \ + ${par_adapter_fasta_r2:+$(add_flags "$par_adapter_fasta_r2" "--adapter_r2" "file:")} \ + ${par_front_r2:+$(add_flags "$par_front_r2" "--front_r2")} \ + ${par_front_fasta_r2:+$(add_flags "$par_front_fasta_r2" "--front_r2" "file:")} \ + ${par_anywhere_r2:+$(add_flags "$par_anywhere_r2" "--anywhere_r2")} \ + ${par_anywhere_fasta_r2:+$(add_flags "$par_anywhere_fasta_r2" "--anywhere_r2" "file:")} \ ) + echo "Arguments to cutadapt:" echo "$adapter_args" echo From 2f187ce1d15947959679fe97b91d0e044030b094 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Thu, 22 Feb 2024 06:53:05 +0100 Subject: [PATCH 04/10] create directories in test --- src/cutadapt/test.sh | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/src/cutadapt/test.sh b/src/cutadapt/test.sh index 14e2e6fe..9059b6f5 100644 --- a/src/cutadapt/test.sh +++ b/src/cutadapt/test.sh @@ -4,6 +4,11 @@ set -e dir_in="$meta_resources_dir/test_data" +############################################# +mkdir test_simple_single_end +cd test_simple_single_end + +echo "#############################################" echo "> Run cutadapt on single-end data" "$meta_executable" \ --report minimal \ @@ -23,8 +28,14 @@ echo ">> Check if output is empty" [ -s "output-dir/1_R1_001.fastq" ] && echo "1_R1_001.fastq should be empty" && exit 1 [ ! -s "output-dir/unknown_R1_001.fastq" ] && echo "unkown_R1_001.fastq is empty" && exit 1 -rm -r output-dir +cd .. +echo + +############################################# +mkdir test_multiple_single_end +cd test_multiple_single_end +echo "#############################################" echo "> Run with a combination of inputs" echo ">adapter1" > adapters1.fasta @@ -54,9 +65,14 @@ echo ">> Check if output is empty" [ -s "output-dir/1_R1_001.fastq" ] && echo "1_R1_001.fastq should be empty" && exit 1 [ ! -s "output-dir/unknown_R1_001.fastq" ] && echo "unkown_R1_001.fastq is empty" && exit 1 -rm -r output-dir -rm adapters?.fasta +cd .. +echo +############################################# +mkdir test_simple_paired_end +cd test_simple_paired_end + +echo "#############################################" echo "> Run cutadapt on paired-end data" "$meta_executable" \ --report minimal \ @@ -83,7 +99,11 @@ echo ">> Check if output is empty" [ ! -s "output-dir/unknown_R1_001.fastq" ] && echo "unkown_R1_001.fastq is empty" && exit 1 [ ! -s "output-dir/unknown_R2_001.fastq" ] && echo "unkown_R2_001.fastq is empty" && exit 1 -rm -r output-dir +cd .. +echo + +############################################# +echo "#############################################" echo "> Test successful" From 986a0901c65fbda4bbf5b7f30de64c4c153b8bd3 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Thu, 22 Feb 2024 09:02:13 +0100 Subject: [PATCH 05/10] use a different output extension if --fasta is provided --- src/cutadapt/script.sh | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/cutadapt/script.sh b/src/cutadapt/script.sh index 1b5b325e..839761ac 100644 --- a/src/cutadapt/script.sh +++ b/src/cutadapt/script.sh @@ -178,11 +178,17 @@ echo ">> Output arguments" [[ "$par_fasta" == "false" ]] && unset par_fasta [[ "$par_info_file" == "false" ]] && unset par_info_file +if [[ -z $par_fasta ]]; then + ext="fastq" +else + ext="fa" +fi + if [ $mode = "se" ]; then output_args=$(echo \ ${par_report:+--report "${par_report}"} \ ${par_json:+--json "${par_output}/report.json"} \ - --output "$par_output/{name}_R1_001.fastq" \ + --output "$par_output/{name}_001.$ext" \ ${par_fasta:+--fasta} \ ${par_info_file:+--info-file} \ ) @@ -190,8 +196,8 @@ else output_args=$(echo \ ${par_report:+--report "${par_report}"} \ ${par_json:+--json "${par_output}/report.json"} \ - --output "$par_output/{name}_R1_001.fastq" \ - --paired-output "$par_output/{name}_R2_001.fastq" \ + --output "$par_output/{name}_R1_001.$ext" \ + --paired-output "$par_output/{name}_R2_001.$ext" \ ${par_fasta:+--fasta} \ ${par_info_file:+--info-file} \ ) From 6b76604cb419d3abd80e0c222b96a744831ebb0e Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Thu, 22 Feb 2024 09:03:47 +0100 Subject: [PATCH 06/10] decrease code duplication by separating optional outputs from paired/unpaired output arguments --- src/cutadapt/script.sh | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/src/cutadapt/script.sh b/src/cutadapt/script.sh index 839761ac..bf7bc5d5 100644 --- a/src/cutadapt/script.sh +++ b/src/cutadapt/script.sh @@ -10,6 +10,7 @@ par_fasta='false' par_info_file='false' ## VIASH END +# TODO: change this? if [ -z $par_output ]; then par_output=. else @@ -169,15 +170,29 @@ echo "Arguments to cutadapt:" echo $filter_args echo -# Output arguments -# We write the output to a directory rather than -# individual files. +# Optional output arguments ########################################################### -echo ">> Output arguments" +echo ">> Optional arguments" [[ "$par_json" == "false" ]] && unset par_json [[ "$par_fasta" == "false" ]] && unset par_fasta [[ "$par_info_file" == "false" ]] && unset par_info_file +optional_output_args=$(echo \ + ${par_report:+--report "${par_report}"} \ + ${par_json:+--json "${par_output}/report.json"} \ + ${par_fasta:+--fasta} \ + ${par_info_file:+--info-file "$par_output/info.txt"} \ +) + +echo "Arguments to cutadapt:" +echo $optional_output_args +echo + +# Output arguments +# We write the output to a directory rather than +# individual files. +########################################################### + if [[ -z $par_fasta ]]; then ext="fastq" else @@ -186,22 +201,15 @@ fi if [ $mode = "se" ]; then output_args=$(echo \ - ${par_report:+--report "${par_report}"} \ - ${par_json:+--json "${par_output}/report.json"} \ --output "$par_output/{name}_001.$ext" \ - ${par_fasta:+--fasta} \ - ${par_info_file:+--info-file} \ ) else output_args=$(echo \ - ${par_report:+--report "${par_report}"} \ - ${par_json:+--json "${par_output}/report.json"} \ --output "$par_output/{name}_R1_001.$ext" \ --paired-output "$par_output/{name}_R2_001.$ext" \ - ${par_fasta:+--fasta} \ - ${par_info_file:+--info-file} \ ) fi + echo "Arguments to cutadapt:" echo $output_args echo @@ -221,6 +229,7 @@ cli=$(echo \ $input_args \ $mod_args \ $filter_args \ + $optional_output_args \ $output_args \ --cores $par_cpus ) From 4601f21613d121b0272d9466296b6a313271fcca Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Thu, 22 Feb 2024 09:34:55 +0100 Subject: [PATCH 07/10] write custom tests for cutadapt --- src/cutadapt/config.vsh.yaml | 2 - src/cutadapt/script.sh | 2 +- src/cutadapt/test.sh | 192 ++++++++++++++++++++++------ src/cutadapt/test_data/pe/a.1.fastq | 4 - src/cutadapt/test_data/pe/a.2.fastq | 4 - src/cutadapt/test_data/script.sh | 16 --- src/cutadapt/test_data/se/a.fastq | 4 - 7 files changed, 151 insertions(+), 73 deletions(-) delete mode 100644 src/cutadapt/test_data/pe/a.1.fastq delete mode 100644 src/cutadapt/test_data/pe/a.2.fastq delete mode 100755 src/cutadapt/test_data/script.sh delete mode 100644 src/cutadapt/test_data/se/a.fastq diff --git a/src/cutadapt/config.vsh.yaml b/src/cutadapt/config.vsh.yaml index 88636952..b9918b88 100644 --- a/src/cutadapt/config.vsh.yaml +++ b/src/cutadapt/config.vsh.yaml @@ -443,8 +443,6 @@ functionality: test_resources: - type: bash_script path: test.sh - - type: file - path: test_data platforms: - type: docker image: python:3.12 diff --git a/src/cutadapt/script.sh b/src/cutadapt/script.sh index bf7bc5d5..0808ddeb 100644 --- a/src/cutadapt/script.sh +++ b/src/cutadapt/script.sh @@ -196,7 +196,7 @@ echo if [[ -z $par_fasta ]]; then ext="fastq" else - ext="fa" + ext="fasta" fi if [ $mode = "se" ]; then diff --git a/src/cutadapt/test.sh b/src/cutadapt/test.sh index 9059b6f5..d36e6798 100644 --- a/src/cutadapt/test.sh +++ b/src/cutadapt/test.sh @@ -2,7 +2,26 @@ set -e -dir_in="$meta_resources_dir/test_data" +############################################# +# helper functions +assert_file_exists() { + [ -f "$1" ] || (echo "File '$1' does not exist" && exit 1) +} +assert_file_doesnt_exist() { + [ ! -f "$1" ] || (echo "File '$1' exists but shouldn't" && exit 1) +} +assert_file_empty() { + [ ! -s "$1" ] || (echo "File '$1' is not empty but should be" && exit 1) +} +assert_file_not_empty() { + [ -s "$1" ] || (echo "File '$1' is empty but shouldn't be" && exit 1) +} +assert_file_contains() { + grep -q "$2" "$1" || (echo "File '$1' does not contain '$2'" && exit 1) +} +assert_file_not_contains() { + grep -q "$2" "$1" && (echo "File '$1' contains '$2' but shouldn't" && exit 1) +} ############################################# mkdir test_simple_single_end @@ -10,23 +29,56 @@ cd test_simple_single_end echo "#############################################" echo "> Run cutadapt on single-end data" + +cat > example.fa <<'EOF' +>read1 +MYSEQUENCEADAPTER +>read2 +MYSEQUENCEADAP +>read3 +MYSEQUENCEADAPTERSOMETHINGELSE +>read4 +MYSEQUENCEADABTER +>read5 +MYSEQUENCEADAPTR +>read6 +MYSEQUENCEADAPPTER +>read7 +ADAPTERMYSEQUENCE +>read8 +PTERMYSEQUENCE +>read9 +SOMETHINGADAPTERMYSEQUENCE +EOF + "$meta_executable" \ --report minimal \ - --output output-dir \ - --adapter AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC \ - --input $dir_in/se/a.fastq \ - --quality_cutoff 20 \ + --output out_test1 \ + --adapter ADAPTER \ + --input example.fa \ + --fasta \ + --no_match_adapter_wildcards \ --json echo ">> Checking output" -[ ! -f "output-dir/report.txt" ] && echo "report.txt does not exist" && exit 1 -[ ! -f "output-dir/report.json" ] && echo "report.json does not exist" && exit 1 -[ ! -f "output-dir/1_R1_001.fastq" ] && echo "1_R1_001.fastq does not exist" && exit 1 -[ ! -f "output-dir/unknown_R1_001.fastq" ] && echo "1_R1_001.fastq does not exist" && exit 1 +assert_file_exists "out_test1/report.txt" +assert_file_exists "out_test1/report.json" +assert_file_exists "out_test1/1_001.fasta" +assert_file_exists "out_test1/unknown_001.fasta" echo ">> Check if output is empty" -[ -s "output-dir/1_R1_001.fastq" ] && echo "1_R1_001.fastq should be empty" && exit 1 -[ ! -s "output-dir/unknown_R1_001.fastq" ] && echo "unkown_R1_001.fastq is empty" && exit 1 +assert_file_not_empty "out_test1/report.txt" +assert_file_not_empty "out_test1/report.json" +assert_file_not_empty "out_test1/1_001.fasta" +assert_file_not_empty "out_test1/unknown_001.fasta" + +echo ">> Check contents" +for i in 1 2 3 7 9; do + assert_file_contains "out_test1/1_001.fasta" ">read$i" +done +for i in 4 5 6 8; do + assert_file_contains "out_test1/unknown_001.fasta" ">read$i" +done cd .. echo @@ -38,32 +90,58 @@ cd test_multiple_single_end echo "#############################################" echo "> Run with a combination of inputs" -echo ">adapter1" > adapters1.fasta -echo "AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC" >> adapters1.fasta - -echo ">adapter1" > adapters2.fasta -echo "TGATCGGAAGAGCACACGTCTGAACTCCAGTCAC" >> adapters2.fasta +cat > example.fa <<'EOF' +>read1 +ACGTACGTACGTAAAAA +>read2 +ACGTACGTACGTCCCCC +>read3 +ACGTACGTACGTGGGGG +>read4 +ACGTACGTACGTTTTTT +EOF + +cat > adapters1.fasta <<'EOF' +>adapter1 +CCCCC +EOF + +cat > adapters2.fasta <<'EOF' +>adapter2 +GGGGG +EOF "$meta_executable" \ --report minimal \ - --output output-dir \ - --adapter AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC \ - --adapter GGATCGGAAGAGCACACGTCTGAACTCCAGTCAC \ + --output out_test2 \ + --adapter AAAAA \ --adapter_fasta adapters1.fasta \ --adapter_fasta adapters2.fasta \ - --input $dir_in/se/a.fastq \ - --quality_cutoff 20 \ + --input example.fa \ + --fasta \ --json echo ">> Checking output" -[ ! -f "output-dir/report.txt" ] && echo "report.txt does not exist" && exit 1 -[ ! -f "output-dir/report.json" ] && echo "report.json does not exist" && exit 1 -[ ! -f "output-dir/1_R1_001.fastq" ] && echo "1_R1_001.fastq does not exist" && exit 1 -[ ! -f "output-dir/unknown_R1_001.fastq" ] && echo "1_R1_001.fastq does not exist" && exit 1 +assert_file_exists "out_test2/report.txt" +assert_file_exists "out_test2/report.json" +assert_file_exists "out_test2/1_001.fasta" +assert_file_exists "out_test2/adapter1_001.fasta" +assert_file_exists "out_test2/adapter2_001.fasta" +assert_file_exists "out_test2/unknown_001.fasta" echo ">> Check if output is empty" -[ -s "output-dir/1_R1_001.fastq" ] && echo "1_R1_001.fastq should be empty" && exit 1 -[ ! -s "output-dir/unknown_R1_001.fastq" ] && echo "unkown_R1_001.fastq is empty" && exit 1 +assert_file_not_empty "out_test2/report.txt" +assert_file_not_empty "out_test2/report.json" +assert_file_not_empty "out_test2/1_001.fasta" +assert_file_not_empty "out_test2/adapter1_001.fasta" +assert_file_not_empty "out_test2/adapter2_001.fasta" +assert_file_not_empty "out_test2/unknown_001.fasta" + +echo ">> Check contents" +assert_file_contains "out_test2/1_001.fasta" ">read1" +assert_file_contains "out_test2/adapter1_001.fasta" ">read2" +assert_file_contains "out_test2/adapter2_001.fasta" ">read3" +assert_file_contains "out_test2/unknown_001.fasta" ">read4" cd .. echo @@ -74,30 +152,60 @@ cd test_simple_paired_end echo "#############################################" echo "> Run cutadapt on paired-end data" + +cat > example_R1.fastq <<'EOF' +@read1 +ACGTACGTACGTAAAAA ++ +IIIIIIIIIIIIIIIII +@read2 +ACGTACGTACGTCCCCC ++ +IIIIIIIIIIIIIIIII +EOF + +cat > example_R2.fastq <<'EOF' +@read1 +ACGTACGTACGTGGGGG ++ +IIIIIIIIIIIIIIIII +@read2 +ACGTACGTACGTTTTTT ++ +IIIIIIIIIIIIIIIII +EOF + "$meta_executable" \ --report minimal \ - --output output-dir \ - --adapter AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC \ - --adapter AGATCGGAAGAGCACACGTCTGAACTCCAGTCAB \ - --input $dir_in/pe/a.1.fastq \ - --input_r2 $dir_in/pe/a.2.fastq \ + --output out_test3 \ + --adapter AAAAA \ + --adapter_r2 GGGGG \ + --input example_R1.fastq \ + --input_r2 example_R2.fastq \ --quality_cutoff 20 \ --json \ ---cpus 1 echo ">> Checking output" -[ ! -f "output-dir/report.txt" ] && echo "report.txt does not exist" && exit 1 -[ ! -f "output-dir/report.json" ] && echo "report.json does not exist" && exit 1 -[ ! -f "output-dir/1_R1_001.fastq" ] && echo "1_R1_001.fastq does not exist" && exit 1 -[ ! -f "output-dir/1_R2_001.fastq" ] && echo "1_R1_001.fastq does not exist" && exit 1 -[ ! -f "output-dir/unknown_R1_001.fastq" ] && echo "1_R1_001.fastq does not exist" && exit 1 -[ ! -f "output-dir/unknown_R2_001.fastq" ] && echo "1_R1_001.fastq does not exist" && exit 1 +assert_file_exists "out_test3/report.txt" +assert_file_exists "out_test3/report.json" +assert_file_exists "out_test3/1_R1_001.fastq" +assert_file_exists "out_test3/1_R2_001.fastq" +assert_file_exists "out_test3/unknown_R1_001.fastq" +assert_file_exists "out_test3/unknown_R2_001.fastq" echo ">> Check if output is empty" -[ -s "output-dir/1_R1_001.fastq" ] && echo "1_R1_001.fastq should be empty" && exit 1 -[ -s "output-dir/1_R2_001.fastq" ] && echo "1_R2_001.fastq should be empty" && exit 1 -[ ! -s "output-dir/unknown_R1_001.fastq" ] && echo "unkown_R1_001.fastq is empty" && exit 1 -[ ! -s "output-dir/unknown_R2_001.fastq" ] && echo "unkown_R2_001.fastq is empty" && exit 1 +assert_file_not_empty "out_test3/report.txt" +assert_file_not_empty "out_test3/report.json" +assert_file_not_empty "out_test3/1_R1_001.fastq" +assert_file_not_empty "out_test3/1_R2_001.fastq" +assert_file_not_empty "out_test3/unknown_R1_001.fastq" + +echo ">> Check contents" +assert_file_contains "out_test3/1_R1_001.fastq" "@read1" +assert_file_contains "out_test3/1_R2_001.fastq" "@read1" +assert_file_contains "out_test3/unknown_R1_001.fastq" "@read2" +assert_file_contains "out_test3/unknown_R2_001.fastq" "@read2" cd .. echo diff --git a/src/cutadapt/test_data/pe/a.1.fastq b/src/cutadapt/test_data/pe/a.1.fastq deleted file mode 100644 index 42735560..00000000 --- a/src/cutadapt/test_data/pe/a.1.fastq +++ /dev/null @@ -1,4 +0,0 @@ -@1 -ACGGCAT -+ -!!!!!!! diff --git a/src/cutadapt/test_data/pe/a.2.fastq b/src/cutadapt/test_data/pe/a.2.fastq deleted file mode 100644 index 42735560..00000000 --- a/src/cutadapt/test_data/pe/a.2.fastq +++ /dev/null @@ -1,4 +0,0 @@ -@1 -ACGGCAT -+ -!!!!!!! diff --git a/src/cutadapt/test_data/script.sh b/src/cutadapt/test_data/script.sh deleted file mode 100755 index 3251b59c..00000000 --- a/src/cutadapt/test_data/script.sh +++ /dev/null @@ -1,16 +0,0 @@ -# cutadapt test data - -# Test data was obtained from https://github.com/snakemake/snakemake-wrappers/tree/master/bio/cutadapt/test - -if [ ! -d /tmp/snakemake-wrappers ]; then - git clone --depth 1 --single-branch --branch master https://github.com/snakemake/snakemake-wrappers /tmp/snakemake-wrappers -fi - -mkdir -p src/cutadapt/test_data/pe -mkdir src/cutadapt/test_data/se - -cp -r /tmp/snakemake-wrappers/bio/cutadapt/se/test/reads/* src/cutadapt/test_data/se -cp -r /tmp/snakemake-wrappers/bio/cutadapt/pe/test/reads/* src/cutadapt/test_data/pe - -rm -rf /tmp/snakemake-wrappers - diff --git a/src/cutadapt/test_data/se/a.fastq b/src/cutadapt/test_data/se/a.fastq deleted file mode 100644 index 42735560..00000000 --- a/src/cutadapt/test_data/se/a.fastq +++ /dev/null @@ -1,4 +0,0 @@ -@1 -ACGGCAT -+ -!!!!!!! From 45d1989c80a106cf9494651ec3f8f8c1be182cfa Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Thu, 22 Feb 2024 10:21:44 +0100 Subject: [PATCH 08/10] fix _r2 arguments --- src/cutadapt/script.sh | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/cutadapt/script.sh b/src/cutadapt/script.sh index 0808ddeb..2d15843f 100644 --- a/src/cutadapt/script.sh +++ b/src/cutadapt/script.sh @@ -68,13 +68,12 @@ adapter_args=$(echo \ ${par_front_fasta:+$(add_flags "$par_front_fasta" "--front" "file:")} \ ${par_anywhere:+$(add_flags "$par_anywhere" "--anywhere")} \ ${par_anywhere_fasta:+$(add_flags "$par_anywhere_fasta" "--anywhere" "file:")} \ - - ${par_adapter_r2:+$(add_flags "$par_adapter_r2" "--adapter_r2")} \ - ${par_adapter_fasta_r2:+$(add_flags "$par_adapter_fasta_r2" "--adapter_r2" "file:")} \ - ${par_front_r2:+$(add_flags "$par_front_r2" "--front_r2")} \ - ${par_front_fasta_r2:+$(add_flags "$par_front_fasta_r2" "--front_r2" "file:")} \ - ${par_anywhere_r2:+$(add_flags "$par_anywhere_r2" "--anywhere_r2")} \ - ${par_anywhere_fasta_r2:+$(add_flags "$par_anywhere_fasta_r2" "--anywhere_r2" "file:")} \ + ${par_adapter_r2:+$(add_flags "$par_adapter_r2" "-A")} \ + ${par_adapter_fasta_r2:+$(add_flags "$par_adapter_fasta_r2" "-A" "file:")} \ + ${par_front_r2:+$(add_flags "$par_front_r2" "-G")} \ + ${par_front_fasta_r2:+$(add_flags "$par_front_fasta_r2" "-G" "file:")} \ + ${par_anywhere_r2:+$(add_flags "$par_anywhere_r2" "-B")} \ + ${par_anywhere_fasta_r2:+$(add_flags "$par_anywhere_fasta_r2" "-B" "file:")} \ ) echo "Arguments to cutadapt:" From 22f99c6e95283be10319baeee31bc19fa2f4c843 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Thu, 22 Feb 2024 10:22:04 +0100 Subject: [PATCH 09/10] add debug flag as not to always print the cli command --- src/cutadapt/config.vsh.yaml | 5 ++++ src/cutadapt/script.sh | 58 +++++++++++++++++++----------------- 2 files changed, 36 insertions(+), 27 deletions(-) diff --git a/src/cutadapt/config.vsh.yaml b/src/cutadapt/config.vsh.yaml index b9918b88..ebd56e3b 100644 --- a/src/cutadapt/config.vsh.yaml +++ b/src/cutadapt/config.vsh.yaml @@ -437,6 +437,11 @@ functionality: # - name: --untrimmed_paired_output # - name: too_short_paired_output # - name: too_long_paired_output + - name: Debug + arguments: + - type: boolean_true + name: --debug + description: Print debug information resources: - type: bash_script path: script.sh diff --git a/src/cutadapt/script.sh b/src/cutadapt/script.sh index 2d15843f..1edfb090 100644 --- a/src/cutadapt/script.sh +++ b/src/cutadapt/script.sh @@ -8,6 +8,7 @@ par_json='false' par_output='output' par_fasta='false' par_info_file='false' +par_debug='true' ## VIASH END # TODO: change this? @@ -17,11 +18,13 @@ else mkdir -p "$par_output" fi +function debug { + [[ "$par_debug" == "true" ]] && echo "DEBUG: $@" +} # Init ########################################################### -echo "Running cutadapt" -echo + echo ">> Paired-end data or not?" mode="" @@ -60,7 +63,7 @@ function add_flags { echo $output } -echo ">> Parsing arguments dealing with adapters" +debug ">> Parsing arguments dealing with adapters" adapter_args=$(echo \ ${par_adapter:+$(add_flags "$par_adapter" "--adapter")} \ ${par_adapter_fasta:+$(add_flags "$par_adapter_fasta" "--adapter" "file:")} \ @@ -76,9 +79,9 @@ adapter_args=$(echo \ ${par_anywhere_fasta_r2:+$(add_flags "$par_anywhere_fasta_r2" "-B" "file:")} \ ) -echo "Arguments to cutadapt:" -echo "$adapter_args" -echo +debug "Arguments to cutadapt:" +debug "$adapter_args" +debug # Paired-end options ########################################################### @@ -91,9 +94,9 @@ paired_args=$(echo \ ${par_pair_filter:+--pair-filter "${par_pair_filter}"} \ ${par_interleaved:+--interleaved} ) -echo "Arguments to cutadapt:" -echo $paired_args -echo +debug "Arguments to cutadapt:" +debug $paired_args +debug # Input arguments ########################################################### @@ -113,9 +116,9 @@ input_args=$(echo \ ${par_action:+--action "${par_action}"} \ ${par_revcomp:+--revcomp} \ ) -echo "Arguments to cutadapt:" -echo $input_args -echo +debug "Arguments to cutadapt:" +debug $input_args +debug # Read modifications ########################################################### @@ -141,9 +144,9 @@ mod_args=$(echo \ ${par_rename:+--rename "${par_rename}"} \ ${par_zero_cap:+--zero-cap} \ ) -echo "Arguments to cutadapt:" -echo $mod_args -echo +debug "Arguments to cutadapt:" +debug $mod_args +debug # Filtering of processed reads arguments ########################################################### @@ -165,9 +168,9 @@ filter_args=$(echo \ ${par_discard_untrimmed:+--discard-untrimmed} \ ${par_discard_casava:+--discard-casava} \ ) -echo "Arguments to cutadapt:" -echo $filter_args -echo +debug "Arguments to cutadapt:" +debug $filter_args +debug # Optional output arguments ########################################################### @@ -183,9 +186,9 @@ optional_output_args=$(echo \ ${par_info_file:+--info-file "$par_output/info.txt"} \ ) -echo "Arguments to cutadapt:" -echo $optional_output_args -echo +debug "Arguments to cutadapt:" +debug $optional_output_args +debug # Output arguments # We write the output to a directory rather than @@ -209,15 +212,14 @@ else ) fi -echo "Arguments to cutadapt:" -echo $output_args -echo +debug "Arguments to cutadapt:" +debug $output_args +debug # Full CLI # Set the --cores argument to 0 unless meta_cpus is set ########################################################### -echo ">> Full CLI to be run:" - +echo ">> Running cutadapt" par_cpus=0 [[ ! -z $meta_cpus ]] && par_cpus=$meta_cpus @@ -233,6 +235,8 @@ cli=$(echo \ --cores $par_cpus ) -echo cutadapt $cli | sed -e 's/--/\r\n --/g' +debug ">> Full CLI to be run:" +debug cutadapt $cli | sed -e 's/--/\r\n --/g' +debug cutadapt $cli | tee $par_output/report.txt From 8370251ab0f46ce846decc9d8d101f8f00165a07 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Sat, 24 Feb 2024 22:24:42 +0100 Subject: [PATCH 10/10] remove comment --- _viash.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/_viash.yaml b/_viash.yaml index f13febba..65344505 100644 --- a/_viash.yaml +++ b/_viash.yaml @@ -1,6 +1,5 @@ viash_version: 0.8.5 -# these config mods will be added by PR #25 config_mods: | .functionality.arguments[.multiple == true].multiple_sep := ";" .functionality.argument_groups[true].arguments[.multiple == true].multiple_sep := ";" \ No newline at end of file