Skip to content

Commit

Permalink
fix script and update test
Browse files Browse the repository at this point in the history
  • Loading branch information
sainirmayi committed Aug 19, 2024
1 parent b725d4a commit 24a9297
Show file tree
Hide file tree
Showing 3 changed files with 114 additions and 85 deletions.
8 changes: 3 additions & 5 deletions src/trimgalore/config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name: trimgalore
description: |
A wrapper tool around Cutadapt and FastQC to consistently apply quality and adapter trimming to FastQ files.
keywords: ["trimming", "adapters"]
links:
homepage: https://github.com/FelixKrueger/TrimGalore
Expand Down Expand Up @@ -36,12 +36,10 @@ argument_groups:
type: boolean
description: Instructs Cutadapt to use ASCII+33 quality scores as Phred scores (Sanger/Illumina 1.9+ encoding) for quality trimming.
required: false
default: true
- name: --phred64
type: boolean
description: Instructs Cutadapt to use ASCII+64 quality scores as Phred scores (Illumina 1.5 encoding) for quality trimming.
required: false
default: false
- name: --fastqc
type: boolean
description: Run FastQC in the default mode on the FastQ file once trimming is complete.
Expand Down Expand Up @@ -309,8 +307,8 @@ engines:
setup:
- type: docker
run: |
echo "TrimGalore: `trim_galore --version | grep -oP 'version \K\d+\.\d+\.\d+'`" > /var/software_versions.txt
echo "TrimGalore: `trim_galore --version | sed -n 's/.*version\s\+\([0-9]\+\.[0-9]\+\.[0-9]\+\).*/\1/p'`" > /var/software_versions.txt
runners:
- type: executable
- type: nextflow
- type: nextflow
62 changes: 47 additions & 15 deletions src/trimgalore/script.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,25 @@ set -eo pipefail

IFS=";" read -ra input <<< $par_input

unset_if_false=( par_phred33 par_phred64 par_fastqc par_illumina par_stranded_illumina par_nextera par_small_rna par_gzip par_dont_gzip par_no_report_file par_suppress_warn par_clock par_polyA par_rrbs par_non_directional par_keep par_paired par_retain_unpaired )
unset_if_false=(
par_phred33
par_phred64
par_fastqc
par_illumina
par_stranded_illumina
par_nextera
par_small_rna
par_gzip
par_dont_gzip
par_no_report_file
par_suppress_warn
par_clock
par_polyA
par_rrbs
par_non_directional
par_keep par_paired
par_retain_unpaired
)

for par in ${unset_if_false[@]}; do
test_val="${!par}"
Expand Down Expand Up @@ -59,21 +77,35 @@ trim_galore \
${input[*]}

if [ $par_paired == "true" ]; then

input_r1=$(basename -- "${input[0]}")
input_r2=$(basename -- "${input[1]}")
[[ ! -z "$par_trimmed_r1" ]] && mv "$par_output_dir/*val_1*.f*q.*" "$par_trimmed_r1"
[[ ! -z "$par_trimmed_r2" ]] && mv "$par_output_dir/*val_2*.f*q.*" "$par_trimmed_r2"
[[ ! -z "$par_trimming_report_r1" ]] && mv "$par_output_dir/${input_r1}_trimming_report.txt" "$par_trimming_report_r1"
[[ ! -z "$par_trimming_report_r2" ]] && mv "$par_output_dir/${input_r1}_trimming_report.txt" "$par_trimming_report_r2"
[[ ! -z "$par_trimmed_fastqc_html_1" ]] && mv "$par_output_dir/*val_1*.html" "$par_trimmed_fastqc_html_1"
[[ ! -z "$par_trimmed_fastqc_html_2" ]] && mv "$par_output_dir/*val_2*.html" "$par_trimmed_fastqc_html_2"
[[ ! -z "$par_trimmed_fastqc_zip_1" ]] && mv "$par_output_dir/*val_1*.zip" "$par_trimmed_fastqc_zip_1"
[[ ! -z "$par_trimmed_fastqc_zip_2" ]] && mv "$par_output_dir/*val_2*.zip" "$par_trimmed_fastqc_zip_2"
[[ ! -z "$par_unpaired_r1" ]] && mv "$par_output_dir/*.unpaired_1.f*q.*" "$par_unpaired_r1"
[[ ! -z "$par_unpaired_r2" ]] && mv "$par_output_dir/*.unpaired_2.f*q.*" "$par_unpaired_r2"
[[ ! -z "$par_trimmed_r1" ]] && mv $par_output_dir/*val_1.f*q* $par_trimmed_r1
[[ ! -z "$par_trimmed_r2" ]] && mv $par_output_dir/*val_2.f*q* $par_trimmed_r2
[[ ! -z "$par_trimming_report_r1" ]] && mv $par_output_dir/${input_r1}_trimming_report.txt $par_trimming_report_r1
[[ ! -z "$par_trimming_report_r2" ]] && mv $par_output_dir/${input_r2}_trimming_report.txt $par_trimming_report_r2

if [ "$par_fastqc" == "true" ]; then
[[ ! -z "$par_trimmed_fastqc_html_1" ]] && mv $par_output_dir/*val_1_fastqc.html $par_trimmed_fastqc_html_1
[[ ! -z "$par_trimmed_fastqc_html_2" ]] && mv $par_output_dir/*val_2_fastqc.html $par_trimmed_fastqc_html_2
[[ ! -z "$par_trimmed_fastqc_zip_1" ]] && mv $par_output_dir/*val_1_fastqc.zip $par_trimmed_fastqc_zip_1
[[ ! -z "$par_trimmed_fastqc_zip_2" ]] && mv $par_output_dir/*val_2_fastqc.zip $par_trimmed_fastqc_zip_2
fi

if [ "$par_retain_unpaired" == "true" ]; then
[[ ! -z "$par_unpaired_r1" ]] && mv $par_output_dir/*.unpaired_1.f*q* $par_unpaired_r1
[[ ! -z "$par_unpaired_r2" ]] && mv $par_output_dir/*.unpaired_2.f*q* $par_unpaired_r2
fi

else

input_r1=$(basename -- "${input[0]}")
[[ ! -z "$par_trimmed_r1" ]] && mv "$par_output_dir/*_trimmed.fq*" "$trimmed_r1"
[[ ! -z "$par_trimming_report_r1" ]] && mv "$par_output_dir/${input_r1}_trimming_report.txt" "$par_trimming_report_r1"
[[ ! -z "$par_trimmed_fastqc_html_1" ]] && mv "$par_output_dir/*_trimmed_fastqc.html" "$par_trimmed_fastqc_html_1"
[[ ! -z "$par_trimmed_fastqc_zip_1" ]] && mv "$par_output_dir/*_trimmed_fastqc.zip" "$par_trimmed_fastqc_zip_1"
[[ ! -z "$par_trimmed_r1" ]] && mv $par_output_dir/*_trimmed.fq* $par_trimmed_r1
[[ ! -z "$par_trimming_report_r1" ]] && mv $par_output_dir/${input_r1}_trimming_report.txt $par_trimming_report_r1

if [ "$par_fastqc" == "true" ]; then
[[ ! -z "$par_trimmed_fastqc_html_1" ]] && mv $par_output_dir/*_trimmed_fastqc.html $par_trimmed_fastqc_html_1
[[ ! -z "$par_trimmed_fastqc_zip_1" ]] && mv $par_output_dir/*_trimmed_fastqc.zip $par_trimmed_fastqc_zip_1
fi

fi
129 changes: 64 additions & 65 deletions src/trimgalore/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,6 @@ assert_file_not_empty() {
assert_file_contains() {
grep -q "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; }
}
assert_file_contains_line() {
grep -q -x "$2" "$1" || { echo "File '$1' does not contain line '$2'" && exit 1; }
}
assert_file_not_contains() {
grep -q "$2" "$1" && { echo "File '$1' contains '$2' but shouldn't" && exit 1; }
}
Expand All @@ -31,96 +28,98 @@ assert_file_not_contains() {
echo ">>> Prepare test data"

cat > example_R1.fastq <<'EOF'
@read1
ACGTACGTACGTAAAAA
@SRR6357071.22842410 22842410/1 kraken:taxid|4932
CAAGTTTTCATCTTCAACAGCTGATTGACTTCTTTGTGGTATGCCTCGATATATTTTTCTTTTTCTTTAATATCTTTATTATAGGTGATTGCCTCATCGTA
+
IIIIIIIIIIIIIIIII
@read2
ACGTACGTACGTCCCCC
BBBBBFFFFFFFFFFFFFFF/BFFFFFFFFFFFFFFFFBFFBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFBFFFFFFFFFFFFFFFFFFFFBF<
@SRR6357071.52260105 52260105/1 kraken:taxid|4932
TAGACTTACCAGTACCCTTTTCGACGGCGGAAACATTCAAAATACCGTTAGAGTCGACATCGAAAGTGACTTCAATTTGTGGGACACCTCTTGGAGCTGGT
+
IIIIIIIIIIIIIIIII
BBBBBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF/FFFFFFFFFFFFFFFF
EOF

cat > example_R2.fastq <<'EOF'
@read1
ACGTACGTACGTGGGGG
@SRR6357071.22842410 22842410/2 kraken:taxid|4932
CCGAGATCGAAGAAACGAATTCACCTGATTGCAGCTGTAAAAGCAGTAAAATCAATCAAACCAATACGGACAACCTTACGATACGATGAGGCAATCACCTA
+
IIIIIIIIIIIIIIIII
@read2
ACGTACGTACGTTTTTT
BBBBBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
@SRR6357071.52260105 52260105/2 kraken:taxid|4932
GTTGATTCCAAGAAACTCTACCATTCCAACTAAGAAATCCGAAGTTTTCTCTACTTATGCTGACAACCAACCAGGTGTCTTGATTCAAGTCTTTGAAGGTG
+
IIIIIIIIIIIIIIIII
BBBBBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
EOF

#################################################################

echo ">>> Testing for paired-end reads"
echo ">>> Testing for single-end reads"
"$meta_executable" \
--paired true \
--input "example_R1.fastq;example_R2.fastq" \
--adapter "ACG" \
--trimmed_fastqc_html_1 example_R1.trimmed.html \
--trimmed_fastqc_html_2 example_R2.trimmed.html \
--trimmed_fastqc_zip_1 example_R1.trimmed.zip \
--trimmed_fastqc_zip_2 example_R2.trimmed.zip \
--trimmed_r1 example_R1.trimmed.fastq \
--trimmed_r2 example_R2.trimmed.fastq \
--trimming_report_r1 example_R1.trimming_report.txt \
--trimming_report_r2 example_R2.trimming_report.txt
--paired false \
--input "example_R1.fastq" \
--trimmed_fastqc_html_1 output_se_test/example.trimmed.html \
--trimmed_fastqc_zip_1 output_se_test/example.trimmed.zip \
--trimmed_r1 output_se_test/example.trimmed.fastq \
--trimming_report_r1 output_se_test/example.trimming_report.txt \
--fastqc true \
--output_dir output_se_test

echo ">> Checking output"
assert_file_exists "example_R1.trimmed.html"
assert_file_exists "example_R2.trimmed.html"
assert_file_exists "example_R1.trimmed.zip"
assert_file_exists "example_R2.trimmed.zip"
assert_file_exists "example_R1.trimmed.fastq"
assert_file_exists "example_R2.trimmed.fastq"
assert_file_exists "example_R1.trimming_report.txt"
assert_file_exists "example_R2.trimming_report.txt"
assert_file_exists "output_se_test/example.trimmed.html"
assert_file_exists "output_se_test/example.trimmed.zip"
assert_file_exists "output_se_test/example.trimmed.fastq"
assert_file_exists "output_se_test/example.trimming_report.txt"

echo ">> Check if output is empty"
assert_file_not_empty "example_R1.trimmed.html"
assert_file_not_empty "example_R2.trimmed.html"
assert_file_not_empty "example_R1.trimmed.zip"
assert_file_not_empty "example_R2.trimmed.zip"
assert_file_not_empty "example_R1.trimmed.fastq"
assert_file_not_empty "example_R2.trimmed.fastq"
assert_file_not_empty "example_R1.trimming_report.txt"
assert_file_not_empty "example_R2.trimming_report.txt"
assert_file_not_empty "output_se_test/example.trimmed.html"
assert_file_not_empty "output_se_test/example.trimmed.zip"
assert_file_not_empty "output_se_test/example.trimmed.fastq"
assert_file_not_empty "output_se_test/example.trimming_report.txt"

echo ">> Check contents"
assert_file_contains_line "example_R1.trimmed.fastq" "TACGTACGTAAAAA"
assert_file_contains_line "example_R2.trimmed.fastq" "TACGTACGTGGGGG"
assert_file_contains "example_R1.trimming_report.txt" "sequences processed in total"
assert_file_contains "example_R2.trimming_report.txt" "Number of sequence pairs removed because at least one read was shorter than the length cutoff"
assert_file_contains "output_se_test/example.trimmed.fastq" "@SRR6357071.22842410 22842410/1"
assert_file_contains "output_se_test/example.trimming_report.txt" "Sequences removed because they became shorter than the length cutoff"

#################################################################

echo ">>> Testing for single-end reads"
echo ">>> Testing for paired-end reads"
"$meta_executable" \
--paired false \
--input "example_R1.fastq" \
--adapter "ACG" \
--trimmed_fastqc_html_1 example.trimmed.html \
--trimmed_fastqc_zip_1 example.trimmed.zip \
--trimmed_r1 example.trimmed.fastq \
--trimming_report_r1 example.trimming_report.txt \
--paired true \
--input "example_R1.fastq;example_R2.fastq" \
--trimmed_fastqc_html_1 output_pe_test/example_R1.trimmed.html \
--trimmed_fastqc_html_2 output_pe_test/example_R2.trimmed.html \
--trimmed_fastqc_zip_1 output_pe_test/example_R1.trimmed.zip \
--trimmed_fastqc_zip_2 output_pe_test/example_R2.trimmed.zip \
--trimmed_r1 output_pe_test/example_R1.trimmed.fastq \
--trimmed_r2 output_pe_test/example_R2.trimmed.fastq \
--trimming_report_r1 output_pe_test/example_R1.trimming_report.txt \
--trimming_report_r2 output_pe_test/example_R2.trimming_report.txt \
--fastqc true \
--output_dir output_pe_test

echo ">> Checking output"
assert_file_exists "example.trimmed.html"
assert_file_exists "example.trimmed.zip"
assert_file_exists "example.trimmed.fastq"
assert_file_exists "example.trimming_report.txt"
assert_file_exists "output_pe_test/example_R1.trimmed.html"
assert_file_exists "output_pe_test/example_R2.trimmed.html"
assert_file_exists "output_pe_test/example_R1.trimmed.zip"
assert_file_exists "output_pe_test/example_R2.trimmed.zip"
assert_file_exists "output_pe_test/example_R1.trimmed.fastq"
assert_file_exists "output_pe_test/example_R2.trimmed.fastq"
assert_file_exists "output_pe_test/example_R1.trimming_report.txt"
assert_file_exists "output_pe_test/example_R2.trimming_report.txt"

echo ">> Check if output is empty"
assert_file_not_empty "example.trimmed.html"
assert_file_not_empty "example.trimmed.zip"
assert_file_not_empty "example.trimmed.fastq"
assert_file_not_empty "example.trimming_report.txt"
assert_file_not_empty "output_pe_test/example_R1.trimmed.html"
assert_file_not_empty "output_pe_test/example_R2.trimmed.html"
assert_file_not_empty "output_pe_test/example_R1.trimmed.zip"
assert_file_not_empty "output_pe_test/example_R2.trimmed.zip"
assert_file_not_empty "output_pe_test/example_R1.trimmed.fastq"
assert_file_not_empty "output_pe_test/example_R2.trimmed.fastq"
assert_file_not_empty "output_pe_test/example_R1.trimming_report.txt"
assert_file_not_empty "output_pe_test/example_R2.trimming_report.txt"

echo ">> Check contents"
assert_file_contains_line "example.trimmed.fastq" "TACGTACGTAAAAA"
assert_file_contains "example.trimming_report.txt" "Sequences removed because they became shorter than the length cutoff"
assert_file_contains "output_pe_test/example_R1.trimmed.fastq" "@SRR6357071.22842410 22842410/1"
assert_file_contains "output_pe_test/example_R2.trimmed.fastq" "@SRR6357071.22842410 22842410/2"
assert_file_contains "output_pe_test/example_R1.trimming_report.txt" "sequences processed in total"
assert_file_contains "output_pe_test/example_R2.trimming_report.txt" "Number of sequence pairs removed because at least one read was shorter than the length cutoff"

#################################################################

Expand Down

0 comments on commit 24a9297

Please sign in to comment.