Skip to content

Commit

Permalink
Added more tests
Browse files Browse the repository at this point in the history
  • Loading branch information
tgaspe committed Jul 23, 2024
1 parent 03a32c5 commit ffcb97e
Show file tree
Hide file tree
Showing 5 changed files with 228 additions and 66 deletions.
1 change: 0 additions & 1 deletion src/fastqc/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ argument_groups:
- name: --zip
type: file
direction: output
multiple: true
multiple_sep: ","
description: |
Create the zip file(s) containing: html report, data, images, icons etc.
Expand Down
73 changes: 38 additions & 35 deletions src/fastqc/script.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,41 +36,44 @@ fastqc \
${par_dir:+--dir "$par_dir"} \
${par_input:+ ${input[*]}}

# input_dir=$(dirname ${input[1]})
# # Both outputs args passed
# if [[ -n "$par_html" ]] && [[ -n "$par_zip" ]]; then
# IFS=',' read -r -a html_files <<< "$par_html"
# IFS=',' read -r -a zip_files <<< "$par_zip"
# for i in "${!input[@]}"; do
# sample_name=$(basename ${input[$i]} .fq)
# input_zip="$input_dir/${sample_name}_fastqc.zip"
# input_html="$input_dir/${sample_name}_fastqc.html"
# zip_file=${zip_files[$i]}
# html_file=${html_files[$i]}
# mv "$input_zip" "$zip_file"
# mv "$input_html" "$html_file"
# done
# # Only html output arg passed
# elif [[ -n "$par_html" ]]; then
# IFS=',' read -r -a html_files <<< "$par_html"
# for i in "${!input[@]}"; do
# sample_name=$(basename ${input[$i]} .fq)
# input_html="$input_dir/${sample_name}_fastqc.html"
# html_file=${html_files[$i]}
# mv "$input_html" "$html_file"
# done
# rm "$input_dir"/*.zip
# # Only zip output arg passed
# elif [[ -n "$par_zip" ]]; then
# IFS=',' read -r -a zip_files <<< "$par_zip"
# for i in "${!input[@]}"; do
# sample_name=$(basename ${input[$i]} .fq)
# input_zip="$input_dir/${sample_name}_fastqc.zip"
# zip_file=${zip_files[$i]}
# mv "$input_zip" "$zip_file"
# done
# rm "$input_dir"/*.html
# fi
input_dir=$(dirname ${input[0]})
# echo "input dir: $input_dir"
# echo "input: ${input[*]}"

# Both outputs args passed
if [[ -n "$par_html" ]] && [[ -n "$par_zip" ]]; then
IFS=',' read -r -a html_files <<< "$par_html"
IFS=',' read -r -a zip_files <<< "$par_zip"
for i in "${!input[@]}"; do
sample_name=$(basename ${input[$i]} .fq)
input_zip="$input_dir/${sample_name}_fastqc.zip"
input_html="$input_dir/${sample_name}_fastqc.html"
zip_file=${zip_files[$i]}
html_file=${html_files[$i]}
mv "$input_zip" "$zip_file"
mv "$input_html" "$html_file"
done
# Only html output arg passed
elif [[ -n "$par_html" ]]; then
IFS=',' read -r -a html_files <<< "$par_html"
for i in "${!input[@]}"; do
sample_name=$(basename ${input[$i]} .fq)
input_html="$input_dir/${sample_name}_fastqc.html"
html_file=${html_files[$i]}
mv "$input_html" "$html_file"
done
rm "$input_dir"/*.zip
# Only zip output arg passed
elif [[ -n "$par_zip" ]]; then
IFS=',' read -r -a zip_files <<< "$par_zip"
for i in "${!input[@]}"; do
sample_name=$(basename ${input[$i]} .fq)
input_zip="$input_dir/${sample_name}_fastqc.zip"
zip_file=${zip_files[$i]}
mv "$input_zip" "$zip_file"
done
rm "$input_dir"/*.html
fi


# Questions:
Expand Down
211 changes: 181 additions & 30 deletions src/fastqc/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,19 @@ CACTTGTAAGGGCAGGCCCCCTTCACCCTCCCGCTCCTGGGGGANNNNNNNNNNANNNCGAGGCCCTGGGGTAGAGGGNN
@?@DDDDDDHHH?GH:?FCBGGB@C?DBEGIIIIAEF;FCGGI#########################################################
EOL

# Create and populate contaminants.txt
printf "contaminant_sequence1\tCACTTGTAAGGGCAGGCCCCCTTCACCCTCCCGCTCCTGGGGGA\n" > "test_data/contaminants.txt"
printf "contaminant_sequence2\tGATCTTGG\n" >> "test_data/contaminants.txt"

# Create and populate SAM file
printf "@HD\tVN:1.0\tSO:unsorted\n" > "test_data/example.sam"
printf "@SQ\tSN:chr1\tLN:248956422\n" >> "test_data/example.sam"
printf "@SQ\tSN:chr2\tLN:242193529\n" >> "test_data/example.sam"
printf "@PG\tID:bowtie2\tPN:bowtie2\tVN:2.3.4.1\tCL:\"/usr/bin/bowtie2-align-s --wrapper basic-0 -x genome -U reads.fq -S output.sam\"\n" >> "test_data/example.sam"
printf "read1\t0\tchr1\t100\t255\t50M\t*\t0\t0\tACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:-10\tXN:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tNM:i:0\tMD:Z:50\tYT:Z:UU\n" >> "test_data/example.sam"
printf "read2\t0\tchr2\t150\t255\t50M\t*\t0\t0\tTGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:-8\tXN:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tNM:i:0\tMD:Z:50\tYT:Z:UU\n" >> "test_data/example.sam"
printf "read3\t16\tchr1\t200\t255\t50M\t*\t0\t0\tGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTA\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:-12\tXN:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tNM:i:0\tMD:Z:50\tYT:Z:UU" >> "test_data/example.sam"

cat > "test_data/expected_summary.txt" <<EOL
PASS Basic Statistics input_1.fq
PASS Per base sequence quality input_1.fq
Expand All @@ -53,79 +66,217 @@ FAIL Overrepresented sequences input_2.fq
PASS Adapter Content input_2.fq
EOL

cat > "test_data/expected_summary_sam.txt" <<EOL
PASS Basic Statistics example.sam
PASS Per base sequence quality example.sam
FAIL Per sequence quality scores example.sam
FAIL Per base sequence content example.sam
WARN Per sequence GC content example.sam
PASS Per base N content example.sam
WARN Sequence Length Distribution example.sam
PASS Sequence Duplication Levels example.sam
FAIL Overrepresented sequences example.sam
PASS Adapter Content example.sam
EOL

# Test 1: Run fastqc with default parameters
mkdir test1
cd test1
echo "-> Run Test without options"
echo "-> Run Test: one input"
"$meta_executable" \
--input "../test_data/input_1.fq"
--extract \
--input "test_data/input_1.fq"

# Check if the html file was generated
[ ! -f "../test_data/input_1_fastqc.html" ] \
[ ! -f "test_data/input_1_fastqc.html" ] \
&& echo "Output HTML file not found." && exit 1

# Check if the zip file was generated
[ ! -f "../test_data/input_1_fastqc.zip" ] \
[ ! -f "test_data/input_1_fastqc.zip" ] \
&& echo "Output ZIP file not found." && exit 1

# Check if the files are empty
[ ! -s "../test_data/input_1_fastqc.html" ] \
[ ! -s "test_data/input_1_fastqc.html" ] \
&& echo "Output HTML file is empty." && exit 1

[ ! -s "../test_data/input_1_fastqc.zip" ] \
[ ! -s "test_data/input_1_fastqc.zip" ] \
&& echo "Output ZIP file is empty." && exit 1

# Unzip the zip file
unzip "../test_data/input_1_fastqc.zip" -d "../test_data/test1/"

# Check if the summary.txt was extracted
[ ! -f "../test_data/test1/input_1_fastqc/summary.txt" ] && echo "Extracted files not found." && exit 1
[ ! -f "test_data/input_1_fastqc/summary.txt" ] && echo "Extracted files not found." && exit 1

# Check if the summary.txt is correct
diff -a "../test_data/expected_summary.txt" "../test_data/test1/input_1_fastqc/summary.txt" \
diff -a "test_data/expected_summary.txt" "test_data/input_1_fastqc/summary.txt" \
|| (echo "Output summary file does not match expected output" && exit 1)

rm -r "test_data/input_1_fastqc"
rm "test_data/input_1_fastqc.html"
rm "test_data/input_1_fastqc.zip"

echo "- test succeeded -"
cd ..

# Test 2: Run fastqc with multiple inputs
mkdir test2
cd test2
echo "-> Run Test with multiple inputs"
echo "-> Run Test: two inputs"
"$meta_executable" \
--input "../test_data/input_1.fq,../test_data/input_2.fq"
--extract \
--input "test_data/input_1.fq,test_data/input_2.fq"

# Check if the html files was generated
[ ! -f "../test_data/input_1_fastqc.html" ] && [ ! -f "../test_data/input_2_fastqc.html" ] \
[ ! -f "test_data/input_1_fastqc.html" ] && [ ! -f "test_data/input_2_fastqc.html" ] \
&& echo "Output HTML files not found." && exit 1

# Check if the zip files was generated
[ ! -f "../test_data/input_1_fastqc.zip" ] && [ ! -f "../test_data/input_2_fastqc.zip" ] \
[ ! -f "test_data/input_1_fastqc.zip" ] && [ ! -f "test_data/input_2_fastqc.zip" ] \
&& echo "Output ZIP files not found." && exit 1

# Check if the files are empty
[ ! -s "../test_data/input_1_fastqc.html" ] && [ ! -s "../test_data/input_2_fastqc.html" ] \
[ ! -s "test_data/input_1_fastqc.html" ] && [ ! -s "test_data/input_2_fastqc.html" ] \
&& echo "Output HTML files are empty." && exit 1

[ ! -s "../test_data/input_1_fastqc.zip" ] && [ ! -s "../test_data/input_2_fastqc.zip" ] \
[ ! -s "test_data/input_1_fastqc.zip" ] && [ ! -s "test_data/input_2_fastqc.zip" ] \
&& echo "Output ZIP files are empty." && exit 1

# Unzip the zip files
unzip "../test_data/input_1_fastqc.zip" -d "../test_data/test2/"
unzip "../test_data/input_2_fastqc.zip" -d "../test_data/test2/"
# Check if the summary.txt was extracted
[ ! -f "test_data/input_1_fastqc/summary.txt" ] && echo "Extracted files not found." && exit 1
[ ! -f "test_data/input_2_fastqc/summary.txt" ] && echo "Extracted files not found." && exit 1

# Check if the summary.txt is correct
diff -a "test_data/expected_summary.txt" "test_data/input_1_fastqc/summary.txt" \
|| (echo "Output summary file does not match expected output" && exit 1)
diff -a "test_data/expected_summary2.txt" "test_data/input_2_fastqc/summary.txt" \
|| (echo "Output summary file does not match expected output" && exit 1)

rm -r "test_data/input_1_fastqc"
rm -r "test_data/input_2_fastqc"
rm "test_data/input_1_fastqc.html"
rm "test_data/input_2_fastqc.html"
rm "test_data/input_1_fastqc.zip"
rm "test_data/input_2_fastqc.zip"

echo "- test succeeded -"

# Test 3: Run fastqc with contaminants
echo "-> Run Test: contaminants"
"$meta_executable" \
--extract \
--input "test_data/input_1.fq" \
--contaminants "test_data/contaminants.txt"

# Check if the html file was generated
[ ! -f "test_data/input_1_fastqc.html" ] \
&& echo "Output HTML file not found." && exit 1

# Check if the zip file was generated
[ ! -f "test_data/input_1_fastqc.zip" ] \
&& echo "Output ZIP file not found." && exit 1

# Check if the files are empty
[ ! -s "test_data/input_1_fastqc.html" ] \
&& echo "Output HTML file is empty." && exit 1

[ ! -s "test_data/input_1_fastqc.zip" ] \
&& echo "Output ZIP file is empty." && exit 1

# Check if the summary.txt was extracted
[ ! -f "test_data/input_1_fastqc/summary.txt" ] && echo "Extracted files not found." && exit 1

# Checking for contaminants in fastqc_data.txt
echo "Checking for contaminants in fastqc_data.txt"
result=$(cat test_data/input_1_fastqc/fastqc_data.txt | grep "contaminant" )
expecte_result=$(printf "CACTTGTAAGGGCAGGCCCCCTTCACCCTCCCGCTCCTGGGGGANNNNNN\t1\t100.0\tcontaminant_sequence1 (100%% over 44bp)\n")

[ -z "$result" ] && echo "Contaminants not found in fastqc_data.txt" && exit 1

[ "$result" != "$expecte_result" ] \
&& echo "Contaminants do not match expected output" \
&& echo "Result: $result" \
&& echo "Expected: $expecte_result" \
&& exit 1

rm -r "test_data/input_1_fastqc"
rm "test_data/input_1_fastqc.html"
rm "test_data/input_1_fastqc.zip"

echo "- test succeeded -"

# Test 4: Run fastqc with sam file
echo "-> Run Test: sam file"
"$meta_executable" \
--extract \
--input "test_data/example.sam" \
--format "sam"

# Check if the html file was generated
[ ! -f "test_data/example_fastqc.html" ] \
&& echo "Output HTML file not found." && exit 1

# Check if the zip file was generated
[ ! -f "test_data/example_fastqc.zip" ] \
&& echo "Output ZIP file not found." && exit 1

# Check if the files are empty
[ ! -s "test_data/example_fastqc.html" ] \
&& echo "Output HTML file is empty." && exit 1

[ ! -s "test_data/example_fastqc.zip" ] \
&& echo "Output ZIP file is empty." && exit 1

# Check if the summary.txt was extracted
[ ! -f "../test_data/test2/input_1_fastqc/summary.txt" ] && echo "Extracted files not found." && exit 1
[ ! -f "../test_data/test2/input_2_fastqc/summary.txt" ] && echo "Extracted files not found." && exit 1
[ ! -f "test_data/example_fastqc/summary.txt" ] && echo "Extracted files not found." && exit 1

# Check if the summary.txt is correct
diff -a "../test_data/expected_summary.txt" "../test_data/test2/input_1_fastqc/summary.txt" \
diff -a "test_data/expected_summary_sam.txt" "test_data/example_fastqc/summary.txt" \
|| (echo "Output summary file does not match expected output" && exit 1)
diff -a "../test_data/expected_summary2.txt" "../test_data/test2/input_2_fastqc/summary.txt" \

rm -r "test_data/example_fastqc"
rm "test_data/example_fastqc.html"
rm "test_data/example_fastqc.zip"

echo "- test succeeded -"

# Test 5: Run fastqc with multiple options
echo "-> Run Test: multiple options"
"$meta_executable" \
--extract \
--input "test_data/input_1.fq" \
--contaminants "test_data/contaminants.txt" \
--format "fastq" \
--casava \
--nofilter \
--nogroup \
--min_length 10 \
--threads 4 \
--kmers 5


# Check if the html file was generated
[ ! -f "test_data/input_1_fastqc.html" ] \
&& echo "Output HTML file not found." && exit 1

# Check if the zip file was generated
[ ! -f "test_data/input_1_fastqc.zip" ] \
&& echo "Output ZIP file not found." && exit 1

# Check if the files are empty
[ ! -s "test_data/input_1_fastqc.html" ] \
&& echo "Output HTML file is empty." && exit 1

[ ! -s "test_data/input_1_fastqc.zip" ] \
&& echo "Output ZIP file is empty." && exit 1

# Check if the summary.txt was extracted
[ ! -f "test_data/input_1_fastqc/summary.txt" ] && echo "Extracted files not found." && exit 1

# Check if the summary.txt is correct
diff -a "test_data/expected_summary.txt" "test_data/input_1_fastqc/summary.txt" \
|| (echo "Output summary file does not match expected output" && exit 1)

rm -r "test_data/input_1_fastqc"
rm "test_data/input_1_fastqc.html"
rm "test_data/input_1_fastqc.zip"

echo "- test succeeded -"
cd ..


# Add more tests here

echo "All tests succeeded!"
exit 0
2 changes: 2 additions & 0 deletions src/fastqc/test_data/contaminants.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
contaminant_sequence1 CACTTGTAAGGGCAGGCCCCCTTCACCCTCCCGCTCCTGGGGGA
contaminant_sequence2 GATCTTGG
7 changes: 7 additions & 0 deletions src/fastqc/test_data/example.sam
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
@HD VN:1.0 SO:unsorted
@SQ SN:chr1 LN:248956422
@SQ SN:chr2 LN:242193529
@PG ID:bowtie2 PN:bowtie2 VN:2.3.4.1 CL:"/usr/bin/bowtie2-align-s --wrapper basic-0 -x genome -U reads.fq -S output.sam"
read1 0 chr1 100 255 50M * 0 0 ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII AS:i:-10 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:50 YT:Z:UU
read2 0 chr2 150 255 50M * 0 0 TGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII AS:i:-8 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:50 YT:Z:UU
read3 16 chr1 200 255 50M * 0 0 GCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII AS:i:-12 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:50 YT:Z:UU

0 comments on commit ffcb97e

Please sign in to comment.