Skip to content

Commit

Permalink
write custom tests for cutadapt
Browse files Browse the repository at this point in the history
  • Loading branch information
rcannood committed Feb 22, 2024
1 parent 6b76604 commit 4601f21
Show file tree
Hide file tree
Showing 7 changed files with 151 additions and 73 deletions.
2 changes: 0 additions & 2 deletions src/cutadapt/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -443,8 +443,6 @@ functionality:
test_resources:
- type: bash_script
path: test.sh
- type: file
path: test_data
platforms:
- type: docker
image: python:3.12
Expand Down
2 changes: 1 addition & 1 deletion src/cutadapt/script.sh
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ echo
if [[ -z $par_fasta ]]; then
ext="fastq"
else
ext="fa"
ext="fasta"
fi

if [ $mode = "se" ]; then
Expand Down
192 changes: 150 additions & 42 deletions src/cutadapt/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,31 +2,83 @@

set -e

dir_in="$meta_resources_dir/test_data"
#############################################
# helper functions
assert_file_exists() {
[ -f "$1" ] || (echo "File '$1' does not exist" && exit 1)
}
assert_file_doesnt_exist() {
[ ! -f "$1" ] || (echo "File '$1' exists but shouldn't" && exit 1)
}
assert_file_empty() {
[ ! -s "$1" ] || (echo "File '$1' is not empty but should be" && exit 1)
}
assert_file_not_empty() {
[ -s "$1" ] || (echo "File '$1' is empty but shouldn't be" && exit 1)
}
assert_file_contains() {
grep -q "$2" "$1" || (echo "File '$1' does not contain '$2'" && exit 1)
}
assert_file_not_contains() {
grep -q "$2" "$1" && (echo "File '$1' contains '$2' but shouldn't" && exit 1)
}

#############################################
mkdir test_simple_single_end
cd test_simple_single_end

echo "#############################################"
echo "> Run cutadapt on single-end data"

cat > example.fa <<'EOF'
>read1
MYSEQUENCEADAPTER
>read2
MYSEQUENCEADAP
>read3
MYSEQUENCEADAPTERSOMETHINGELSE
>read4
MYSEQUENCEADABTER
>read5
MYSEQUENCEADAPTR
>read6
MYSEQUENCEADAPPTER
>read7
ADAPTERMYSEQUENCE
>read8
PTERMYSEQUENCE
>read9
SOMETHINGADAPTERMYSEQUENCE
EOF

"$meta_executable" \
--report minimal \
--output output-dir \
--adapter AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC \
--input $dir_in/se/a.fastq \
--quality_cutoff 20 \
--output out_test1 \
--adapter ADAPTER \
--input example.fa \
--fasta \
--no_match_adapter_wildcards \
--json

echo ">> Checking output"
[ ! -f "output-dir/report.txt" ] && echo "report.txt does not exist" && exit 1
[ ! -f "output-dir/report.json" ] && echo "report.json does not exist" && exit 1
[ ! -f "output-dir/1_R1_001.fastq" ] && echo "1_R1_001.fastq does not exist" && exit 1
[ ! -f "output-dir/unknown_R1_001.fastq" ] && echo "1_R1_001.fastq does not exist" && exit 1
assert_file_exists "out_test1/report.txt"
assert_file_exists "out_test1/report.json"
assert_file_exists "out_test1/1_001.fasta"
assert_file_exists "out_test1/unknown_001.fasta"

echo ">> Check if output is empty"
[ -s "output-dir/1_R1_001.fastq" ] && echo "1_R1_001.fastq should be empty" && exit 1
[ ! -s "output-dir/unknown_R1_001.fastq" ] && echo "unkown_R1_001.fastq is empty" && exit 1
assert_file_not_empty "out_test1/report.txt"
assert_file_not_empty "out_test1/report.json"
assert_file_not_empty "out_test1/1_001.fasta"
assert_file_not_empty "out_test1/unknown_001.fasta"

echo ">> Check contents"
for i in 1 2 3 7 9; do
assert_file_contains "out_test1/1_001.fasta" ">read$i"
done
for i in 4 5 6 8; do
assert_file_contains "out_test1/unknown_001.fasta" ">read$i"
done

cd ..
echo
Expand All @@ -38,32 +90,58 @@ cd test_multiple_single_end
echo "#############################################"
echo "> Run with a combination of inputs"

echo ">adapter1" > adapters1.fasta
echo "AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC" >> adapters1.fasta

echo ">adapter1" > adapters2.fasta
echo "TGATCGGAAGAGCACACGTCTGAACTCCAGTCAC" >> adapters2.fasta
cat > example.fa <<'EOF'
>read1
ACGTACGTACGTAAAAA
>read2
ACGTACGTACGTCCCCC
>read3
ACGTACGTACGTGGGGG
>read4
ACGTACGTACGTTTTTT
EOF

cat > adapters1.fasta <<'EOF'
>adapter1
CCCCC
EOF

cat > adapters2.fasta <<'EOF'
>adapter2
GGGGG
EOF

"$meta_executable" \
--report minimal \
--output output-dir \
--adapter AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC \
--adapter GGATCGGAAGAGCACACGTCTGAACTCCAGTCAC \
--output out_test2 \
--adapter AAAAA \
--adapter_fasta adapters1.fasta \
--adapter_fasta adapters2.fasta \
--input $dir_in/se/a.fastq \
--quality_cutoff 20 \
--input example.fa \
--fasta \
--json

echo ">> Checking output"
[ ! -f "output-dir/report.txt" ] && echo "report.txt does not exist" && exit 1
[ ! -f "output-dir/report.json" ] && echo "report.json does not exist" && exit 1
[ ! -f "output-dir/1_R1_001.fastq" ] && echo "1_R1_001.fastq does not exist" && exit 1
[ ! -f "output-dir/unknown_R1_001.fastq" ] && echo "1_R1_001.fastq does not exist" && exit 1
assert_file_exists "out_test2/report.txt"
assert_file_exists "out_test2/report.json"
assert_file_exists "out_test2/1_001.fasta"
assert_file_exists "out_test2/adapter1_001.fasta"
assert_file_exists "out_test2/adapter2_001.fasta"
assert_file_exists "out_test2/unknown_001.fasta"

echo ">> Check if output is empty"
[ -s "output-dir/1_R1_001.fastq" ] && echo "1_R1_001.fastq should be empty" && exit 1
[ ! -s "output-dir/unknown_R1_001.fastq" ] && echo "unkown_R1_001.fastq is empty" && exit 1
assert_file_not_empty "out_test2/report.txt"
assert_file_not_empty "out_test2/report.json"
assert_file_not_empty "out_test2/1_001.fasta"
assert_file_not_empty "out_test2/adapter1_001.fasta"
assert_file_not_empty "out_test2/adapter2_001.fasta"
assert_file_not_empty "out_test2/unknown_001.fasta"

echo ">> Check contents"
assert_file_contains "out_test2/1_001.fasta" ">read1"
assert_file_contains "out_test2/adapter1_001.fasta" ">read2"
assert_file_contains "out_test2/adapter2_001.fasta" ">read3"
assert_file_contains "out_test2/unknown_001.fasta" ">read4"

cd ..
echo
Expand All @@ -74,30 +152,60 @@ cd test_simple_paired_end

echo "#############################################"
echo "> Run cutadapt on paired-end data"

cat > example_R1.fastq <<'EOF'
@read1
ACGTACGTACGTAAAAA
+
IIIIIIIIIIIIIIIII
@read2
ACGTACGTACGTCCCCC
+
IIIIIIIIIIIIIIIII
EOF

cat > example_R2.fastq <<'EOF'
@read1
ACGTACGTACGTGGGGG
+
IIIIIIIIIIIIIIIII
@read2
ACGTACGTACGTTTTTT
+
IIIIIIIIIIIIIIIII
EOF

"$meta_executable" \
--report minimal \
--output output-dir \
--adapter AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC \
--adapter AGATCGGAAGAGCACACGTCTGAACTCCAGTCAB \
--input $dir_in/pe/a.1.fastq \
--input_r2 $dir_in/pe/a.2.fastq \
--output out_test3 \
--adapter AAAAA \
--adapter_r2 GGGGG \
--input example_R1.fastq \
--input_r2 example_R2.fastq \
--quality_cutoff 20 \
--json \
---cpus 1

echo ">> Checking output"
[ ! -f "output-dir/report.txt" ] && echo "report.txt does not exist" && exit 1
[ ! -f "output-dir/report.json" ] && echo "report.json does not exist" && exit 1
[ ! -f "output-dir/1_R1_001.fastq" ] && echo "1_R1_001.fastq does not exist" && exit 1
[ ! -f "output-dir/1_R2_001.fastq" ] && echo "1_R1_001.fastq does not exist" && exit 1
[ ! -f "output-dir/unknown_R1_001.fastq" ] && echo "1_R1_001.fastq does not exist" && exit 1
[ ! -f "output-dir/unknown_R2_001.fastq" ] && echo "1_R1_001.fastq does not exist" && exit 1
assert_file_exists "out_test3/report.txt"
assert_file_exists "out_test3/report.json"
assert_file_exists "out_test3/1_R1_001.fastq"
assert_file_exists "out_test3/1_R2_001.fastq"
assert_file_exists "out_test3/unknown_R1_001.fastq"
assert_file_exists "out_test3/unknown_R2_001.fastq"

echo ">> Check if output is empty"
[ -s "output-dir/1_R1_001.fastq" ] && echo "1_R1_001.fastq should be empty" && exit 1
[ -s "output-dir/1_R2_001.fastq" ] && echo "1_R2_001.fastq should be empty" && exit 1
[ ! -s "output-dir/unknown_R1_001.fastq" ] && echo "unkown_R1_001.fastq is empty" && exit 1
[ ! -s "output-dir/unknown_R2_001.fastq" ] && echo "unkown_R2_001.fastq is empty" && exit 1
assert_file_not_empty "out_test3/report.txt"
assert_file_not_empty "out_test3/report.json"
assert_file_not_empty "out_test3/1_R1_001.fastq"
assert_file_not_empty "out_test3/1_R2_001.fastq"
assert_file_not_empty "out_test3/unknown_R1_001.fastq"

echo ">> Check contents"
assert_file_contains "out_test3/1_R1_001.fastq" "@read1"
assert_file_contains "out_test3/1_R2_001.fastq" "@read1"
assert_file_contains "out_test3/unknown_R1_001.fastq" "@read2"
assert_file_contains "out_test3/unknown_R2_001.fastq" "@read2"

cd ..
echo
Expand Down
4 changes: 0 additions & 4 deletions src/cutadapt/test_data/pe/a.1.fastq

This file was deleted.

4 changes: 0 additions & 4 deletions src/cutadapt/test_data/pe/a.2.fastq

This file was deleted.

16 changes: 0 additions & 16 deletions src/cutadapt/test_data/script.sh

This file was deleted.

4 changes: 0 additions & 4 deletions src/cutadapt/test_data/se/a.fastq

This file was deleted.

0 comments on commit 4601f21

Please sign in to comment.