From ffcb97ed3ba07fa55910bc698ac235b68066c1c0 Mon Sep 17 00:00:00 2001 From: tgaspe Date: Tue, 23 Jul 2024 16:31:02 -0300 Subject: [PATCH] Added more tests --- src/fastqc/config.vsh.yaml | 1 - src/fastqc/script.sh | 73 ++++----- src/fastqc/test.sh | 211 ++++++++++++++++++++++---- src/fastqc/test_data/contaminants.txt | 2 + src/fastqc/test_data/example.sam | 7 + 5 files changed, 228 insertions(+), 66 deletions(-) create mode 100644 src/fastqc/test_data/contaminants.txt create mode 100644 src/fastqc/test_data/example.sam diff --git a/src/fastqc/config.vsh.yaml b/src/fastqc/config.vsh.yaml index 9cdb1176..315263d1 100644 --- a/src/fastqc/config.vsh.yaml +++ b/src/fastqc/config.vsh.yaml @@ -35,7 +35,6 @@ argument_groups: - name: --zip type: file direction: output - multiple: true multiple_sep: "," description: | Create the zip file(s) containing: html report, data, images, icons etc. diff --git a/src/fastqc/script.sh b/src/fastqc/script.sh index 0a77c446..2749d03d 100644 --- a/src/fastqc/script.sh +++ b/src/fastqc/script.sh @@ -36,41 +36,44 @@ fastqc \ ${par_dir:+--dir "$par_dir"} \ ${par_input:+ ${input[*]}} -# input_dir=$(dirname ${input[1]}) -# # Both outputs args passed -# if [[ -n "$par_html" ]] && [[ -n "$par_zip" ]]; then -# IFS=',' read -r -a html_files <<< "$par_html" -# IFS=',' read -r -a zip_files <<< "$par_zip" -# for i in "${!input[@]}"; do -# sample_name=$(basename ${input[$i]} .fq) -# input_zip="$input_dir/${sample_name}_fastqc.zip" -# input_html="$input_dir/${sample_name}_fastqc.html" -# zip_file=${zip_files[$i]} -# html_file=${html_files[$i]} -# mv "$input_zip" "$zip_file" -# mv "$input_html" "$html_file" -# done -# # Only html output arg passed -# elif [[ -n "$par_html" ]]; then -# IFS=',' read -r -a html_files <<< "$par_html" -# for i in "${!input[@]}"; do -# sample_name=$(basename ${input[$i]} .fq) -# input_html="$input_dir/${sample_name}_fastqc.html" -# html_file=${html_files[$i]} -# mv "$input_html" "$html_file" -# done -# rm "$input_dir"/*.zip -# # Only zip output arg passed -# elif [[ -n "$par_zip" ]]; then -# IFS=',' read -r -a zip_files <<< "$par_zip" -# for i in "${!input[@]}"; do -# sample_name=$(basename ${input[$i]} .fq) -# input_zip="$input_dir/${sample_name}_fastqc.zip" -# zip_file=${zip_files[$i]} -# mv "$input_zip" "$zip_file" -# done -# rm "$input_dir"/*.html -# fi +input_dir=$(dirname ${input[0]}) +# echo "input dir: $input_dir" +# echo "input: ${input[*]}" + +# Both outputs args passed +if [[ -n "$par_html" ]] && [[ -n "$par_zip" ]]; then + IFS=',' read -r -a html_files <<< "$par_html" + IFS=',' read -r -a zip_files <<< "$par_zip" + for i in "${!input[@]}"; do + sample_name=$(basename ${input[$i]} .fq) + input_zip="$input_dir/${sample_name}_fastqc.zip" + input_html="$input_dir/${sample_name}_fastqc.html" + zip_file=${zip_files[$i]} + html_file=${html_files[$i]} + mv "$input_zip" "$zip_file" + mv "$input_html" "$html_file" + done +# Only html output arg passed +elif [[ -n "$par_html" ]]; then + IFS=',' read -r -a html_files <<< "$par_html" + for i in "${!input[@]}"; do + sample_name=$(basename ${input[$i]} .fq) + input_html="$input_dir/${sample_name}_fastqc.html" + html_file=${html_files[$i]} + mv "$input_html" "$html_file" + done + rm "$input_dir"/*.zip +# Only zip output arg passed +elif [[ -n "$par_zip" ]]; then + IFS=',' read -r -a zip_files <<< "$par_zip" + for i in "${!input[@]}"; do + sample_name=$(basename ${input[$i]} .fq) + input_zip="$input_dir/${sample_name}_fastqc.zip" + zip_file=${zip_files[$i]} + mv "$input_zip" "$zip_file" + done + rm "$input_dir"/*.html +fi # Questions: diff --git a/src/fastqc/test.sh b/src/fastqc/test.sh index f4c6420f..bbcc9b04 100644 --- a/src/fastqc/test.sh +++ b/src/fastqc/test.sh @@ -27,6 +27,19 @@ CACTTGTAAGGGCAGGCCCCCTTCACCCTCCCGCTCCTGGGGGANNNNNNNNNNANNNCGAGGCCCTGGGGTAGAGGGNN @?@DDDDDDHHH?GH:?FCBGGB@C?DBEGIIIIAEF;FCGGI######################################################### EOL +# Create and populate contaminants.txt +printf "contaminant_sequence1\tCACTTGTAAGGGCAGGCCCCCTTCACCCTCCCGCTCCTGGGGGA\n" > "test_data/contaminants.txt" +printf "contaminant_sequence2\tGATCTTGG\n" >> "test_data/contaminants.txt" + +# Create and populate SAM file +printf "@HD\tVN:1.0\tSO:unsorted\n" > "test_data/example.sam" +printf "@SQ\tSN:chr1\tLN:248956422\n" >> "test_data/example.sam" +printf "@SQ\tSN:chr2\tLN:242193529\n" >> "test_data/example.sam" +printf "@PG\tID:bowtie2\tPN:bowtie2\tVN:2.3.4.1\tCL:\"/usr/bin/bowtie2-align-s --wrapper basic-0 -x genome -U reads.fq -S output.sam\"\n" >> "test_data/example.sam" +printf "read1\t0\tchr1\t100\t255\t50M\t*\t0\t0\tACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:-10\tXN:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tNM:i:0\tMD:Z:50\tYT:Z:UU\n" >> "test_data/example.sam" +printf "read2\t0\tchr2\t150\t255\t50M\t*\t0\t0\tTGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:-8\tXN:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tNM:i:0\tMD:Z:50\tYT:Z:UU\n" >> "test_data/example.sam" +printf "read3\t16\tchr1\t200\t255\t50M\t*\t0\t0\tGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTA\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:-12\tXN:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tNM:i:0\tMD:Z:50\tYT:Z:UU" >> "test_data/example.sam" + cat > "test_data/expected_summary.txt" < "test_data/expected_summary_sam.txt" < Run Test without options" +echo "-> Run Test: one input" "$meta_executable" \ - --input "../test_data/input_1.fq" + --extract \ + --input "test_data/input_1.fq" # Check if the html file was generated -[ ! -f "../test_data/input_1_fastqc.html" ] \ +[ ! -f "test_data/input_1_fastqc.html" ] \ && echo "Output HTML file not found." && exit 1 # Check if the zip file was generated -[ ! -f "../test_data/input_1_fastqc.zip" ] \ +[ ! -f "test_data/input_1_fastqc.zip" ] \ && echo "Output ZIP file not found." && exit 1 # Check if the files are empty -[ ! -s "../test_data/input_1_fastqc.html" ] \ +[ ! -s "test_data/input_1_fastqc.html" ] \ && echo "Output HTML file is empty." && exit 1 -[ ! -s "../test_data/input_1_fastqc.zip" ] \ +[ ! -s "test_data/input_1_fastqc.zip" ] \ && echo "Output ZIP file is empty." && exit 1 -# Unzip the zip file -unzip "../test_data/input_1_fastqc.zip" -d "../test_data/test1/" - # Check if the summary.txt was extracted -[ ! -f "../test_data/test1/input_1_fastqc/summary.txt" ] && echo "Extracted files not found." && exit 1 +[ ! -f "test_data/input_1_fastqc/summary.txt" ] && echo "Extracted files not found." && exit 1 # Check if the summary.txt is correct -diff -a "../test_data/expected_summary.txt" "../test_data/test1/input_1_fastqc/summary.txt" \ +diff -a "test_data/expected_summary.txt" "test_data/input_1_fastqc/summary.txt" \ || (echo "Output summary file does not match expected output" && exit 1) +rm -r "test_data/input_1_fastqc" +rm "test_data/input_1_fastqc.html" +rm "test_data/input_1_fastqc.zip" + echo "- test succeeded -" -cd .. # Test 2: Run fastqc with multiple inputs -mkdir test2 -cd test2 -echo "-> Run Test with multiple inputs" +echo "-> Run Test: two inputs" "$meta_executable" \ - --input "../test_data/input_1.fq,../test_data/input_2.fq" + --extract \ + --input "test_data/input_1.fq,test_data/input_2.fq" # Check if the html files was generated -[ ! -f "../test_data/input_1_fastqc.html" ] && [ ! -f "../test_data/input_2_fastqc.html" ] \ +[ ! -f "test_data/input_1_fastqc.html" ] && [ ! -f "test_data/input_2_fastqc.html" ] \ && echo "Output HTML files not found." && exit 1 # Check if the zip files was generated -[ ! -f "../test_data/input_1_fastqc.zip" ] && [ ! -f "../test_data/input_2_fastqc.zip" ] \ +[ ! -f "test_data/input_1_fastqc.zip" ] && [ ! -f "test_data/input_2_fastqc.zip" ] \ && echo "Output ZIP files not found." && exit 1 # Check if the files are empty -[ ! -s "../test_data/input_1_fastqc.html" ] && [ ! -s "../test_data/input_2_fastqc.html" ] \ +[ ! -s "test_data/input_1_fastqc.html" ] && [ ! -s "test_data/input_2_fastqc.html" ] \ && echo "Output HTML files are empty." && exit 1 -[ ! -s "../test_data/input_1_fastqc.zip" ] && [ ! -s "../test_data/input_2_fastqc.zip" ] \ +[ ! -s "test_data/input_1_fastqc.zip" ] && [ ! -s "test_data/input_2_fastqc.zip" ] \ && echo "Output ZIP files are empty." && exit 1 -# Unzip the zip files -unzip "../test_data/input_1_fastqc.zip" -d "../test_data/test2/" -unzip "../test_data/input_2_fastqc.zip" -d "../test_data/test2/" +# Check if the summary.txt was extracted +[ ! -f "test_data/input_1_fastqc/summary.txt" ] && echo "Extracted files not found." && exit 1 +[ ! -f "test_data/input_2_fastqc/summary.txt" ] && echo "Extracted files not found." && exit 1 + +# Check if the summary.txt is correct +diff -a "test_data/expected_summary.txt" "test_data/input_1_fastqc/summary.txt" \ + || (echo "Output summary file does not match expected output" && exit 1) +diff -a "test_data/expected_summary2.txt" "test_data/input_2_fastqc/summary.txt" \ + || (echo "Output summary file does not match expected output" && exit 1) + +rm -r "test_data/input_1_fastqc" +rm -r "test_data/input_2_fastqc" +rm "test_data/input_1_fastqc.html" +rm "test_data/input_2_fastqc.html" +rm "test_data/input_1_fastqc.zip" +rm "test_data/input_2_fastqc.zip" + +echo "- test succeeded -" + +# Test 3: Run fastqc with contaminants +echo "-> Run Test: contaminants" +"$meta_executable" \ + --extract \ + --input "test_data/input_1.fq" \ + --contaminants "test_data/contaminants.txt" + +# Check if the html file was generated +[ ! -f "test_data/input_1_fastqc.html" ] \ + && echo "Output HTML file not found." && exit 1 + +# Check if the zip file was generated +[ ! -f "test_data/input_1_fastqc.zip" ] \ + && echo "Output ZIP file not found." && exit 1 + +# Check if the files are empty +[ ! -s "test_data/input_1_fastqc.html" ] \ + && echo "Output HTML file is empty." && exit 1 + +[ ! -s "test_data/input_1_fastqc.zip" ] \ + && echo "Output ZIP file is empty." && exit 1 + +# Check if the summary.txt was extracted +[ ! -f "test_data/input_1_fastqc/summary.txt" ] && echo "Extracted files not found." && exit 1 + +# Checking for contaminants in fastqc_data.txt +echo "Checking for contaminants in fastqc_data.txt" +result=$(cat test_data/input_1_fastqc/fastqc_data.txt | grep "contaminant" ) +expecte_result=$(printf "CACTTGTAAGGGCAGGCCCCCTTCACCCTCCCGCTCCTGGGGGANNNNNN\t1\t100.0\tcontaminant_sequence1 (100%% over 44bp)\n") + +[ -z "$result" ] && echo "Contaminants not found in fastqc_data.txt" && exit 1 + +[ "$result" != "$expecte_result" ] \ + && echo "Contaminants do not match expected output" \ + && echo "Result: $result" \ + && echo "Expected: $expecte_result" \ + && exit 1 + +rm -r "test_data/input_1_fastqc" +rm "test_data/input_1_fastqc.html" +rm "test_data/input_1_fastqc.zip" + +echo "- test succeeded -" + +# Test 4: Run fastqc with sam file +echo "-> Run Test: sam file" +"$meta_executable" \ + --extract \ + --input "test_data/example.sam" \ + --format "sam" + +# Check if the html file was generated +[ ! -f "test_data/example_fastqc.html" ] \ + && echo "Output HTML file not found." && exit 1 + +# Check if the zip file was generated +[ ! -f "test_data/example_fastqc.zip" ] \ + && echo "Output ZIP file not found." && exit 1 + +# Check if the files are empty +[ ! -s "test_data/example_fastqc.html" ] \ + && echo "Output HTML file is empty." && exit 1 + +[ ! -s "test_data/example_fastqc.zip" ] \ + && echo "Output ZIP file is empty." && exit 1 # Check if the summary.txt was extracted -[ ! -f "../test_data/test2/input_1_fastqc/summary.txt" ] && echo "Extracted files not found." && exit 1 -[ ! -f "../test_data/test2/input_2_fastqc/summary.txt" ] && echo "Extracted files not found." && exit 1 +[ ! -f "test_data/example_fastqc/summary.txt" ] && echo "Extracted files not found." && exit 1 # Check if the summary.txt is correct -diff -a "../test_data/expected_summary.txt" "../test_data/test2/input_1_fastqc/summary.txt" \ +diff -a "test_data/expected_summary_sam.txt" "test_data/example_fastqc/summary.txt" \ || (echo "Output summary file does not match expected output" && exit 1) -diff -a "../test_data/expected_summary2.txt" "../test_data/test2/input_2_fastqc/summary.txt" \ + +rm -r "test_data/example_fastqc" +rm "test_data/example_fastqc.html" +rm "test_data/example_fastqc.zip" + +echo "- test succeeded -" + +# Test 5: Run fastqc with multiple options +echo "-> Run Test: multiple options" +"$meta_executable" \ + --extract \ + --input "test_data/input_1.fq" \ + --contaminants "test_data/contaminants.txt" \ + --format "fastq" \ + --casava \ + --nofilter \ + --nogroup \ + --min_length 10 \ + --threads 4 \ + --kmers 5 + + +# Check if the html file was generated +[ ! -f "test_data/input_1_fastqc.html" ] \ + && echo "Output HTML file not found." && exit 1 + +# Check if the zip file was generated +[ ! -f "test_data/input_1_fastqc.zip" ] \ + && echo "Output ZIP file not found." && exit 1 + +# Check if the files are empty +[ ! -s "test_data/input_1_fastqc.html" ] \ + && echo "Output HTML file is empty." && exit 1 + +[ ! -s "test_data/input_1_fastqc.zip" ] \ + && echo "Output ZIP file is empty." && exit 1 + +# Check if the summary.txt was extracted +[ ! -f "test_data/input_1_fastqc/summary.txt" ] && echo "Extracted files not found." && exit 1 + +# Check if the summary.txt is correct +diff -a "test_data/expected_summary.txt" "test_data/input_1_fastqc/summary.txt" \ || (echo "Output summary file does not match expected output" && exit 1) +rm -r "test_data/input_1_fastqc" +rm "test_data/input_1_fastqc.html" +rm "test_data/input_1_fastqc.zip" + echo "- test succeeded -" -cd .. + + +# Add more tests here echo "All tests succeeded!" exit 0 \ No newline at end of file diff --git a/src/fastqc/test_data/contaminants.txt b/src/fastqc/test_data/contaminants.txt new file mode 100644 index 00000000..90dbe496 --- /dev/null +++ b/src/fastqc/test_data/contaminants.txt @@ -0,0 +1,2 @@ +contaminant_sequence1 CACTTGTAAGGGCAGGCCCCCTTCACCCTCCCGCTCCTGGGGGA +contaminant_sequence2 GATCTTGG diff --git a/src/fastqc/test_data/example.sam b/src/fastqc/test_data/example.sam new file mode 100644 index 00000000..76b39a8c --- /dev/null +++ b/src/fastqc/test_data/example.sam @@ -0,0 +1,7 @@ +@HD VN:1.0 SO:unsorted +@SQ SN:chr1 LN:248956422 +@SQ SN:chr2 LN:242193529 +@PG ID:bowtie2 PN:bowtie2 VN:2.3.4.1 CL:"/usr/bin/bowtie2-align-s --wrapper basic-0 -x genome -U reads.fq -S output.sam" +read1 0 chr1 100 255 50M * 0 0 ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII AS:i:-10 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:50 YT:Z:UU +read2 0 chr2 150 255 50M * 0 0 TGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII AS:i:-8 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:50 YT:Z:UU +read3 16 chr1 200 255 50M * 0 0 GCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII AS:i:-12 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:50 YT:Z:UU