From 4601f21613d121b0272d9466296b6a313271fcca Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Thu, 22 Feb 2024 09:34:55 +0100 Subject: [PATCH] write custom tests for cutadapt --- src/cutadapt/config.vsh.yaml | 2 - src/cutadapt/script.sh | 2 +- src/cutadapt/test.sh | 192 ++++++++++++++++++++++------ src/cutadapt/test_data/pe/a.1.fastq | 4 - src/cutadapt/test_data/pe/a.2.fastq | 4 - src/cutadapt/test_data/script.sh | 16 --- src/cutadapt/test_data/se/a.fastq | 4 - 7 files changed, 151 insertions(+), 73 deletions(-) delete mode 100644 src/cutadapt/test_data/pe/a.1.fastq delete mode 100644 src/cutadapt/test_data/pe/a.2.fastq delete mode 100755 src/cutadapt/test_data/script.sh delete mode 100644 src/cutadapt/test_data/se/a.fastq diff --git a/src/cutadapt/config.vsh.yaml b/src/cutadapt/config.vsh.yaml index 88636952..b9918b88 100644 --- a/src/cutadapt/config.vsh.yaml +++ b/src/cutadapt/config.vsh.yaml @@ -443,8 +443,6 @@ functionality: test_resources: - type: bash_script path: test.sh - - type: file - path: test_data platforms: - type: docker image: python:3.12 diff --git a/src/cutadapt/script.sh b/src/cutadapt/script.sh index bf7bc5d5..0808ddeb 100644 --- a/src/cutadapt/script.sh +++ b/src/cutadapt/script.sh @@ -196,7 +196,7 @@ echo if [[ -z $par_fasta ]]; then ext="fastq" else - ext="fa" + ext="fasta" fi if [ $mode = "se" ]; then diff --git a/src/cutadapt/test.sh b/src/cutadapt/test.sh index 9059b6f5..d36e6798 100644 --- a/src/cutadapt/test.sh +++ b/src/cutadapt/test.sh @@ -2,7 +2,26 @@ set -e -dir_in="$meta_resources_dir/test_data" +############################################# +# helper functions +assert_file_exists() { + [ -f "$1" ] || (echo "File '$1' does not exist" && exit 1) +} +assert_file_doesnt_exist() { + [ ! -f "$1" ] || (echo "File '$1' exists but shouldn't" && exit 1) +} +assert_file_empty() { + [ ! -s "$1" ] || (echo "File '$1' is not empty but should be" && exit 1) +} +assert_file_not_empty() { + [ -s "$1" ] || (echo "File '$1' is empty but shouldn't be" && exit 1) +} +assert_file_contains() { + grep -q "$2" "$1" || (echo "File '$1' does not contain '$2'" && exit 1) +} +assert_file_not_contains() { + grep -q "$2" "$1" && (echo "File '$1' contains '$2' but shouldn't" && exit 1) +} ############################################# mkdir test_simple_single_end @@ -10,23 +29,56 @@ cd test_simple_single_end echo "#############################################" echo "> Run cutadapt on single-end data" + +cat > example.fa <<'EOF' +>read1 +MYSEQUENCEADAPTER +>read2 +MYSEQUENCEADAP +>read3 +MYSEQUENCEADAPTERSOMETHINGELSE +>read4 +MYSEQUENCEADABTER +>read5 +MYSEQUENCEADAPTR +>read6 +MYSEQUENCEADAPPTER +>read7 +ADAPTERMYSEQUENCE +>read8 +PTERMYSEQUENCE +>read9 +SOMETHINGADAPTERMYSEQUENCE +EOF + "$meta_executable" \ --report minimal \ - --output output-dir \ - --adapter AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC \ - --input $dir_in/se/a.fastq \ - --quality_cutoff 20 \ + --output out_test1 \ + --adapter ADAPTER \ + --input example.fa \ + --fasta \ + --no_match_adapter_wildcards \ --json echo ">> Checking output" -[ ! -f "output-dir/report.txt" ] && echo "report.txt does not exist" && exit 1 -[ ! -f "output-dir/report.json" ] && echo "report.json does not exist" && exit 1 -[ ! -f "output-dir/1_R1_001.fastq" ] && echo "1_R1_001.fastq does not exist" && exit 1 -[ ! -f "output-dir/unknown_R1_001.fastq" ] && echo "1_R1_001.fastq does not exist" && exit 1 +assert_file_exists "out_test1/report.txt" +assert_file_exists "out_test1/report.json" +assert_file_exists "out_test1/1_001.fasta" +assert_file_exists "out_test1/unknown_001.fasta" echo ">> Check if output is empty" -[ -s "output-dir/1_R1_001.fastq" ] && echo "1_R1_001.fastq should be empty" && exit 1 -[ ! -s "output-dir/unknown_R1_001.fastq" ] && echo "unkown_R1_001.fastq is empty" && exit 1 +assert_file_not_empty "out_test1/report.txt" +assert_file_not_empty "out_test1/report.json" +assert_file_not_empty "out_test1/1_001.fasta" +assert_file_not_empty "out_test1/unknown_001.fasta" + +echo ">> Check contents" +for i in 1 2 3 7 9; do + assert_file_contains "out_test1/1_001.fasta" ">read$i" +done +for i in 4 5 6 8; do + assert_file_contains "out_test1/unknown_001.fasta" ">read$i" +done cd .. echo @@ -38,32 +90,58 @@ cd test_multiple_single_end echo "#############################################" echo "> Run with a combination of inputs" -echo ">adapter1" > adapters1.fasta -echo "AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC" >> adapters1.fasta - -echo ">adapter1" > adapters2.fasta -echo "TGATCGGAAGAGCACACGTCTGAACTCCAGTCAC" >> adapters2.fasta +cat > example.fa <<'EOF' +>read1 +ACGTACGTACGTAAAAA +>read2 +ACGTACGTACGTCCCCC +>read3 +ACGTACGTACGTGGGGG +>read4 +ACGTACGTACGTTTTTT +EOF + +cat > adapters1.fasta <<'EOF' +>adapter1 +CCCCC +EOF + +cat > adapters2.fasta <<'EOF' +>adapter2 +GGGGG +EOF "$meta_executable" \ --report minimal \ - --output output-dir \ - --adapter AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC \ - --adapter GGATCGGAAGAGCACACGTCTGAACTCCAGTCAC \ + --output out_test2 \ + --adapter AAAAA \ --adapter_fasta adapters1.fasta \ --adapter_fasta adapters2.fasta \ - --input $dir_in/se/a.fastq \ - --quality_cutoff 20 \ + --input example.fa \ + --fasta \ --json echo ">> Checking output" -[ ! -f "output-dir/report.txt" ] && echo "report.txt does not exist" && exit 1 -[ ! -f "output-dir/report.json" ] && echo "report.json does not exist" && exit 1 -[ ! -f "output-dir/1_R1_001.fastq" ] && echo "1_R1_001.fastq does not exist" && exit 1 -[ ! -f "output-dir/unknown_R1_001.fastq" ] && echo "1_R1_001.fastq does not exist" && exit 1 +assert_file_exists "out_test2/report.txt" +assert_file_exists "out_test2/report.json" +assert_file_exists "out_test2/1_001.fasta" +assert_file_exists "out_test2/adapter1_001.fasta" +assert_file_exists "out_test2/adapter2_001.fasta" +assert_file_exists "out_test2/unknown_001.fasta" echo ">> Check if output is empty" -[ -s "output-dir/1_R1_001.fastq" ] && echo "1_R1_001.fastq should be empty" && exit 1 -[ ! -s "output-dir/unknown_R1_001.fastq" ] && echo "unkown_R1_001.fastq is empty" && exit 1 +assert_file_not_empty "out_test2/report.txt" +assert_file_not_empty "out_test2/report.json" +assert_file_not_empty "out_test2/1_001.fasta" +assert_file_not_empty "out_test2/adapter1_001.fasta" +assert_file_not_empty "out_test2/adapter2_001.fasta" +assert_file_not_empty "out_test2/unknown_001.fasta" + +echo ">> Check contents" +assert_file_contains "out_test2/1_001.fasta" ">read1" +assert_file_contains "out_test2/adapter1_001.fasta" ">read2" +assert_file_contains "out_test2/adapter2_001.fasta" ">read3" +assert_file_contains "out_test2/unknown_001.fasta" ">read4" cd .. echo @@ -74,30 +152,60 @@ cd test_simple_paired_end echo "#############################################" echo "> Run cutadapt on paired-end data" + +cat > example_R1.fastq <<'EOF' +@read1 +ACGTACGTACGTAAAAA ++ +IIIIIIIIIIIIIIIII +@read2 +ACGTACGTACGTCCCCC ++ +IIIIIIIIIIIIIIIII +EOF + +cat > example_R2.fastq <<'EOF' +@read1 +ACGTACGTACGTGGGGG ++ +IIIIIIIIIIIIIIIII +@read2 +ACGTACGTACGTTTTTT ++ +IIIIIIIIIIIIIIIII +EOF + "$meta_executable" \ --report minimal \ - --output output-dir \ - --adapter AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC \ - --adapter AGATCGGAAGAGCACACGTCTGAACTCCAGTCAB \ - --input $dir_in/pe/a.1.fastq \ - --input_r2 $dir_in/pe/a.2.fastq \ + --output out_test3 \ + --adapter AAAAA \ + --adapter_r2 GGGGG \ + --input example_R1.fastq \ + --input_r2 example_R2.fastq \ --quality_cutoff 20 \ --json \ ---cpus 1 echo ">> Checking output" -[ ! -f "output-dir/report.txt" ] && echo "report.txt does not exist" && exit 1 -[ ! -f "output-dir/report.json" ] && echo "report.json does not exist" && exit 1 -[ ! -f "output-dir/1_R1_001.fastq" ] && echo "1_R1_001.fastq does not exist" && exit 1 -[ ! -f "output-dir/1_R2_001.fastq" ] && echo "1_R1_001.fastq does not exist" && exit 1 -[ ! -f "output-dir/unknown_R1_001.fastq" ] && echo "1_R1_001.fastq does not exist" && exit 1 -[ ! -f "output-dir/unknown_R2_001.fastq" ] && echo "1_R1_001.fastq does not exist" && exit 1 +assert_file_exists "out_test3/report.txt" +assert_file_exists "out_test3/report.json" +assert_file_exists "out_test3/1_R1_001.fastq" +assert_file_exists "out_test3/1_R2_001.fastq" +assert_file_exists "out_test3/unknown_R1_001.fastq" +assert_file_exists "out_test3/unknown_R2_001.fastq" echo ">> Check if output is empty" -[ -s "output-dir/1_R1_001.fastq" ] && echo "1_R1_001.fastq should be empty" && exit 1 -[ -s "output-dir/1_R2_001.fastq" ] && echo "1_R2_001.fastq should be empty" && exit 1 -[ ! -s "output-dir/unknown_R1_001.fastq" ] && echo "unkown_R1_001.fastq is empty" && exit 1 -[ ! -s "output-dir/unknown_R2_001.fastq" ] && echo "unkown_R2_001.fastq is empty" && exit 1 +assert_file_not_empty "out_test3/report.txt" +assert_file_not_empty "out_test3/report.json" +assert_file_not_empty "out_test3/1_R1_001.fastq" +assert_file_not_empty "out_test3/1_R2_001.fastq" +assert_file_not_empty "out_test3/unknown_R1_001.fastq" + +echo ">> Check contents" +assert_file_contains "out_test3/1_R1_001.fastq" "@read1" +assert_file_contains "out_test3/1_R2_001.fastq" "@read1" +assert_file_contains "out_test3/unknown_R1_001.fastq" "@read2" +assert_file_contains "out_test3/unknown_R2_001.fastq" "@read2" cd .. echo diff --git a/src/cutadapt/test_data/pe/a.1.fastq b/src/cutadapt/test_data/pe/a.1.fastq deleted file mode 100644 index 42735560..00000000 --- a/src/cutadapt/test_data/pe/a.1.fastq +++ /dev/null @@ -1,4 +0,0 @@ -@1 -ACGGCAT -+ -!!!!!!! diff --git a/src/cutadapt/test_data/pe/a.2.fastq b/src/cutadapt/test_data/pe/a.2.fastq deleted file mode 100644 index 42735560..00000000 --- a/src/cutadapt/test_data/pe/a.2.fastq +++ /dev/null @@ -1,4 +0,0 @@ -@1 -ACGGCAT -+ -!!!!!!! diff --git a/src/cutadapt/test_data/script.sh b/src/cutadapt/test_data/script.sh deleted file mode 100755 index 3251b59c..00000000 --- a/src/cutadapt/test_data/script.sh +++ /dev/null @@ -1,16 +0,0 @@ -# cutadapt test data - -# Test data was obtained from https://github.com/snakemake/snakemake-wrappers/tree/master/bio/cutadapt/test - -if [ ! -d /tmp/snakemake-wrappers ]; then - git clone --depth 1 --single-branch --branch master https://github.com/snakemake/snakemake-wrappers /tmp/snakemake-wrappers -fi - -mkdir -p src/cutadapt/test_data/pe -mkdir src/cutadapt/test_data/se - -cp -r /tmp/snakemake-wrappers/bio/cutadapt/se/test/reads/* src/cutadapt/test_data/se -cp -r /tmp/snakemake-wrappers/bio/cutadapt/pe/test/reads/* src/cutadapt/test_data/pe - -rm -rf /tmp/snakemake-wrappers - diff --git a/src/cutadapt/test_data/se/a.fastq b/src/cutadapt/test_data/se/a.fastq deleted file mode 100644 index 42735560..00000000 --- a/src/cutadapt/test_data/se/a.fastq +++ /dev/null @@ -1,4 +0,0 @@ -@1 -ACGGCAT -+ -!!!!!!!