From 24a92976268770607a10f2d8d592415f2e69079f Mon Sep 17 00:00:00 2001 From: Nirmayi Date: Mon, 19 Aug 2024 10:04:47 +0200 Subject: [PATCH] fix script and update test --- src/trimgalore/config.vsh.yaml | 8 +- src/trimgalore/script.sh | 62 ++++++++++++---- src/trimgalore/test.sh | 129 ++++++++++++++++----------------- 3 files changed, 114 insertions(+), 85 deletions(-) diff --git a/src/trimgalore/config.vsh.yaml b/src/trimgalore/config.vsh.yaml index 9074564e..c55bf19d 100644 --- a/src/trimgalore/config.vsh.yaml +++ b/src/trimgalore/config.vsh.yaml @@ -1,6 +1,6 @@ name: trimgalore description: | - + A wrapper tool around Cutadapt and FastQC to consistently apply quality and adapter trimming to FastQ files. keywords: ["trimming", "adapters"] links: homepage: https://github.com/FelixKrueger/TrimGalore @@ -36,12 +36,10 @@ argument_groups: type: boolean description: Instructs Cutadapt to use ASCII+33 quality scores as Phred scores (Sanger/Illumina 1.9+ encoding) for quality trimming. required: false - default: true - name: --phred64 type: boolean description: Instructs Cutadapt to use ASCII+64 quality scores as Phred scores (Illumina 1.5 encoding) for quality trimming. required: false - default: false - name: --fastqc type: boolean description: Run FastQC in the default mode on the FastQ file once trimming is complete. @@ -309,8 +307,8 @@ engines: setup: - type: docker run: | - echo "TrimGalore: `trim_galore --version | grep -oP 'version \K\d+\.\d+\.\d+'`" > /var/software_versions.txt + echo "TrimGalore: `trim_galore --version | sed -n 's/.*version\s\+\([0-9]\+\.[0-9]\+\.[0-9]\+\).*/\1/p'`" > /var/software_versions.txt runners: - type: executable - - type: nextflow \ No newline at end of file + - type: nextflow diff --git a/src/trimgalore/script.sh b/src/trimgalore/script.sh index db1f1f13..46413c1f 100755 --- a/src/trimgalore/script.sh +++ b/src/trimgalore/script.sh @@ -6,7 +6,25 @@ set -eo pipefail IFS=";" read -ra input <<< $par_input -unset_if_false=( par_phred33 par_phred64 par_fastqc par_illumina par_stranded_illumina par_nextera par_small_rna par_gzip par_dont_gzip par_no_report_file par_suppress_warn par_clock par_polyA par_rrbs par_non_directional par_keep par_paired par_retain_unpaired ) +unset_if_false=( + par_phred33 + par_phred64 + par_fastqc + par_illumina + par_stranded_illumina + par_nextera + par_small_rna + par_gzip + par_dont_gzip + par_no_report_file + par_suppress_warn + par_clock + par_polyA + par_rrbs + par_non_directional + par_keep par_paired + par_retain_unpaired +) for par in ${unset_if_false[@]}; do test_val="${!par}" @@ -59,21 +77,35 @@ trim_galore \ ${input[*]} if [ $par_paired == "true" ]; then + input_r1=$(basename -- "${input[0]}") input_r2=$(basename -- "${input[1]}") - [[ ! -z "$par_trimmed_r1" ]] && mv "$par_output_dir/*val_1*.f*q.*" "$par_trimmed_r1" - [[ ! -z "$par_trimmed_r2" ]] && mv "$par_output_dir/*val_2*.f*q.*" "$par_trimmed_r2" - [[ ! -z "$par_trimming_report_r1" ]] && mv "$par_output_dir/${input_r1}_trimming_report.txt" "$par_trimming_report_r1" - [[ ! -z "$par_trimming_report_r2" ]] && mv "$par_output_dir/${input_r1}_trimming_report.txt" "$par_trimming_report_r2" - [[ ! -z "$par_trimmed_fastqc_html_1" ]] && mv "$par_output_dir/*val_1*.html" "$par_trimmed_fastqc_html_1" - [[ ! -z "$par_trimmed_fastqc_html_2" ]] && mv "$par_output_dir/*val_2*.html" "$par_trimmed_fastqc_html_2" - [[ ! -z "$par_trimmed_fastqc_zip_1" ]] && mv "$par_output_dir/*val_1*.zip" "$par_trimmed_fastqc_zip_1" - [[ ! -z "$par_trimmed_fastqc_zip_2" ]] && mv "$par_output_dir/*val_2*.zip" "$par_trimmed_fastqc_zip_2" - [[ ! -z "$par_unpaired_r1" ]] && mv "$par_output_dir/*.unpaired_1.f*q.*" "$par_unpaired_r1" - [[ ! -z "$par_unpaired_r2" ]] && mv "$par_output_dir/*.unpaired_2.f*q.*" "$par_unpaired_r2" + [[ ! -z "$par_trimmed_r1" ]] && mv $par_output_dir/*val_1.f*q* $par_trimmed_r1 + [[ ! -z "$par_trimmed_r2" ]] && mv $par_output_dir/*val_2.f*q* $par_trimmed_r2 + [[ ! -z "$par_trimming_report_r1" ]] && mv $par_output_dir/${input_r1}_trimming_report.txt $par_trimming_report_r1 + [[ ! -z "$par_trimming_report_r2" ]] && mv $par_output_dir/${input_r2}_trimming_report.txt $par_trimming_report_r2 + + if [ "$par_fastqc" == "true" ]; then + [[ ! -z "$par_trimmed_fastqc_html_1" ]] && mv $par_output_dir/*val_1_fastqc.html $par_trimmed_fastqc_html_1 + [[ ! -z "$par_trimmed_fastqc_html_2" ]] && mv $par_output_dir/*val_2_fastqc.html $par_trimmed_fastqc_html_2 + [[ ! -z "$par_trimmed_fastqc_zip_1" ]] && mv $par_output_dir/*val_1_fastqc.zip $par_trimmed_fastqc_zip_1 + [[ ! -z "$par_trimmed_fastqc_zip_2" ]] && mv $par_output_dir/*val_2_fastqc.zip $par_trimmed_fastqc_zip_2 + fi + + if [ "$par_retain_unpaired" == "true" ]; then + [[ ! -z "$par_unpaired_r1" ]] && mv $par_output_dir/*.unpaired_1.f*q* $par_unpaired_r1 + [[ ! -z "$par_unpaired_r2" ]] && mv $par_output_dir/*.unpaired_2.f*q* $par_unpaired_r2 + fi + else + input_r1=$(basename -- "${input[0]}") - [[ ! -z "$par_trimmed_r1" ]] && mv "$par_output_dir/*_trimmed.fq*" "$trimmed_r1" - [[ ! -z "$par_trimming_report_r1" ]] && mv "$par_output_dir/${input_r1}_trimming_report.txt" "$par_trimming_report_r1" - [[ ! -z "$par_trimmed_fastqc_html_1" ]] && mv "$par_output_dir/*_trimmed_fastqc.html" "$par_trimmed_fastqc_html_1" - [[ ! -z "$par_trimmed_fastqc_zip_1" ]] && mv "$par_output_dir/*_trimmed_fastqc.zip" "$par_trimmed_fastqc_zip_1" + [[ ! -z "$par_trimmed_r1" ]] && mv $par_output_dir/*_trimmed.fq* $par_trimmed_r1 + [[ ! -z "$par_trimming_report_r1" ]] && mv $par_output_dir/${input_r1}_trimming_report.txt $par_trimming_report_r1 + + if [ "$par_fastqc" == "true" ]; then + [[ ! -z "$par_trimmed_fastqc_html_1" ]] && mv $par_output_dir/*_trimmed_fastqc.html $par_trimmed_fastqc_html_1 + [[ ! -z "$par_trimmed_fastqc_zip_1" ]] && mv $par_output_dir/*_trimmed_fastqc.zip $par_trimmed_fastqc_zip_1 + fi + +fi \ No newline at end of file diff --git a/src/trimgalore/test.sh b/src/trimgalore/test.sh index fc906d12..d1416607 100644 --- a/src/trimgalore/test.sh +++ b/src/trimgalore/test.sh @@ -19,9 +19,6 @@ assert_file_not_empty() { assert_file_contains() { grep -q "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; } } -assert_file_contains_line() { - grep -q -x "$2" "$1" || { echo "File '$1' does not contain line '$2'" && exit 1; } -} assert_file_not_contains() { grep -q "$2" "$1" && { echo "File '$1' contains '$2' but shouldn't" && exit 1; } } @@ -31,96 +28,98 @@ assert_file_not_contains() { echo ">>> Prepare test data" cat > example_R1.fastq <<'EOF' -@read1 -ACGTACGTACGTAAAAA +@SRR6357071.22842410 22842410/1 kraken:taxid|4932 +CAAGTTTTCATCTTCAACAGCTGATTGACTTCTTTGTGGTATGCCTCGATATATTTTTCTTTTTCTTTAATATCTTTATTATAGGTGATTGCCTCATCGTA + -IIIIIIIIIIIIIIIII -@read2 -ACGTACGTACGTCCCCC +BBBBBFFFFFFFFFFFFFFF/BFFFFFFFFFFFFFFFFBFFBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFBFFFFFFFFFFFFFFFFFFFFBF< +@SRR6357071.52260105 52260105/1 kraken:taxid|4932 +TAGACTTACCAGTACCCTTTTCGACGGCGGAAACATTCAAAATACCGTTAGAGTCGACATCGAAAGTGACTTCAATTTGTGGGACACCTCTTGGAGCTGGT + -IIIIIIIIIIIIIIIII +BBBBBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF/FFFFFFFFFFFFFFFF EOF cat > example_R2.fastq <<'EOF' -@read1 -ACGTACGTACGTGGGGG +@SRR6357071.22842410 22842410/2 kraken:taxid|4932 +CCGAGATCGAAGAAACGAATTCACCTGATTGCAGCTGTAAAAGCAGTAAAATCAATCAAACCAATACGGACAACCTTACGATACGATGAGGCAATCACCTA + -IIIIIIIIIIIIIIIII -@read2 -ACGTACGTACGTTTTTT +BBBBBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@SRR6357071.52260105 52260105/2 kraken:taxid|4932 +GTTGATTCCAAGAAACTCTACCATTCCAACTAAGAAATCCGAAGTTTTCTCTACTTATGCTGACAACCAACCAGGTGTCTTGATTCAAGTCTTTGAAGGTG + -IIIIIIIIIIIIIIIII +BBBBBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF EOF ################################################################# -echo ">>> Testing for paired-end reads" +echo ">>> Testing for single-end reads" "$meta_executable" \ - --paired true \ - --input "example_R1.fastq;example_R2.fastq" \ - --adapter "ACG" \ - --trimmed_fastqc_html_1 example_R1.trimmed.html \ - --trimmed_fastqc_html_2 example_R2.trimmed.html \ - --trimmed_fastqc_zip_1 example_R1.trimmed.zip \ - --trimmed_fastqc_zip_2 example_R2.trimmed.zip \ - --trimmed_r1 example_R1.trimmed.fastq \ - --trimmed_r2 example_R2.trimmed.fastq \ - --trimming_report_r1 example_R1.trimming_report.txt \ - --trimming_report_r2 example_R2.trimming_report.txt + --paired false \ + --input "example_R1.fastq" \ + --trimmed_fastqc_html_1 output_se_test/example.trimmed.html \ + --trimmed_fastqc_zip_1 output_se_test/example.trimmed.zip \ + --trimmed_r1 output_se_test/example.trimmed.fastq \ + --trimming_report_r1 output_se_test/example.trimming_report.txt \ + --fastqc true \ + --output_dir output_se_test echo ">> Checking output" -assert_file_exists "example_R1.trimmed.html" -assert_file_exists "example_R2.trimmed.html" -assert_file_exists "example_R1.trimmed.zip" -assert_file_exists "example_R2.trimmed.zip" -assert_file_exists "example_R1.trimmed.fastq" -assert_file_exists "example_R2.trimmed.fastq" -assert_file_exists "example_R1.trimming_report.txt" -assert_file_exists "example_R2.trimming_report.txt" +assert_file_exists "output_se_test/example.trimmed.html" +assert_file_exists "output_se_test/example.trimmed.zip" +assert_file_exists "output_se_test/example.trimmed.fastq" +assert_file_exists "output_se_test/example.trimming_report.txt" echo ">> Check if output is empty" -assert_file_not_empty "example_R1.trimmed.html" -assert_file_not_empty "example_R2.trimmed.html" -assert_file_not_empty "example_R1.trimmed.zip" -assert_file_not_empty "example_R2.trimmed.zip" -assert_file_not_empty "example_R1.trimmed.fastq" -assert_file_not_empty "example_R2.trimmed.fastq" -assert_file_not_empty "example_R1.trimming_report.txt" -assert_file_not_empty "example_R2.trimming_report.txt" +assert_file_not_empty "output_se_test/example.trimmed.html" +assert_file_not_empty "output_se_test/example.trimmed.zip" +assert_file_not_empty "output_se_test/example.trimmed.fastq" +assert_file_not_empty "output_se_test/example.trimming_report.txt" echo ">> Check contents" -assert_file_contains_line "example_R1.trimmed.fastq" "TACGTACGTAAAAA" -assert_file_contains_line "example_R2.trimmed.fastq" "TACGTACGTGGGGG" -assert_file_contains "example_R1.trimming_report.txt" "sequences processed in total" -assert_file_contains "example_R2.trimming_report.txt" "Number of sequence pairs removed because at least one read was shorter than the length cutoff" +assert_file_contains "output_se_test/example.trimmed.fastq" "@SRR6357071.22842410 22842410/1" +assert_file_contains "output_se_test/example.trimming_report.txt" "Sequences removed because they became shorter than the length cutoff" ################################################################# -echo ">>> Testing for single-end reads" +echo ">>> Testing for paired-end reads" "$meta_executable" \ - --paired false \ - --input "example_R1.fastq" \ - --adapter "ACG" \ - --trimmed_fastqc_html_1 example.trimmed.html \ - --trimmed_fastqc_zip_1 example.trimmed.zip \ - --trimmed_r1 example.trimmed.fastq \ - --trimming_report_r1 example.trimming_report.txt \ + --paired true \ + --input "example_R1.fastq;example_R2.fastq" \ + --trimmed_fastqc_html_1 output_pe_test/example_R1.trimmed.html \ + --trimmed_fastqc_html_2 output_pe_test/example_R2.trimmed.html \ + --trimmed_fastqc_zip_1 output_pe_test/example_R1.trimmed.zip \ + --trimmed_fastqc_zip_2 output_pe_test/example_R2.trimmed.zip \ + --trimmed_r1 output_pe_test/example_R1.trimmed.fastq \ + --trimmed_r2 output_pe_test/example_R2.trimmed.fastq \ + --trimming_report_r1 output_pe_test/example_R1.trimming_report.txt \ + --trimming_report_r2 output_pe_test/example_R2.trimming_report.txt \ + --fastqc true \ + --output_dir output_pe_test echo ">> Checking output" -assert_file_exists "example.trimmed.html" -assert_file_exists "example.trimmed.zip" -assert_file_exists "example.trimmed.fastq" -assert_file_exists "example.trimming_report.txt" +assert_file_exists "output_pe_test/example_R1.trimmed.html" +assert_file_exists "output_pe_test/example_R2.trimmed.html" +assert_file_exists "output_pe_test/example_R1.trimmed.zip" +assert_file_exists "output_pe_test/example_R2.trimmed.zip" +assert_file_exists "output_pe_test/example_R1.trimmed.fastq" +assert_file_exists "output_pe_test/example_R2.trimmed.fastq" +assert_file_exists "output_pe_test/example_R1.trimming_report.txt" +assert_file_exists "output_pe_test/example_R2.trimming_report.txt" echo ">> Check if output is empty" -assert_file_not_empty "example.trimmed.html" -assert_file_not_empty "example.trimmed.zip" -assert_file_not_empty "example.trimmed.fastq" -assert_file_not_empty "example.trimming_report.txt" +assert_file_not_empty "output_pe_test/example_R1.trimmed.html" +assert_file_not_empty "output_pe_test/example_R2.trimmed.html" +assert_file_not_empty "output_pe_test/example_R1.trimmed.zip" +assert_file_not_empty "output_pe_test/example_R2.trimmed.zip" +assert_file_not_empty "output_pe_test/example_R1.trimmed.fastq" +assert_file_not_empty "output_pe_test/example_R2.trimmed.fastq" +assert_file_not_empty "output_pe_test/example_R1.trimming_report.txt" +assert_file_not_empty "output_pe_test/example_R2.trimming_report.txt" echo ">> Check contents" -assert_file_contains_line "example.trimmed.fastq" "TACGTACGTAAAAA" -assert_file_contains "example.trimming_report.txt" "Sequences removed because they became shorter than the length cutoff" +assert_file_contains "output_pe_test/example_R1.trimmed.fastq" "@SRR6357071.22842410 22842410/1" +assert_file_contains "output_pe_test/example_R2.trimmed.fastq" "@SRR6357071.22842410 22842410/2" +assert_file_contains "output_pe_test/example_R1.trimming_report.txt" "sequences processed in total" +assert_file_contains "output_pe_test/example_R2.trimming_report.txt" "Number of sequence pairs removed because at least one read was shorter than the length cutoff" #################################################################