From c9877866aebc29fa0949f4cf0ecec847f8bfc15c Mon Sep 17 00:00:00 2001 From: emmarousseau Date: Sat, 16 Mar 2024 13:29:39 +0100 Subject: [PATCH] Refined tests --- src/gffread/config.vsh.yaml | 3 +- src/gffread/script.sh | 4 +- src/gffread/test.sh | 42 +++++++++++++++------ src/gffread/test_data/output/annotation.tbl | 4 ++ 4 files changed, 39 insertions(+), 14 deletions(-) create mode 100644 src/gffread/test_data/output/annotation.tbl diff --git a/src/gffread/config.vsh.yaml b/src/gffread/config.vsh.yaml index 4b10660a..4ac2932d 100644 --- a/src/gffread/config.vsh.yaml +++ b/src/gffread/config.vsh.yaml @@ -85,10 +85,11 @@ argument_groups: - name: --spliced_exons alternatives: -w type: file + direction: output must_exist: false - example: exons.fa description: | Write a fasta file with spliced exons for each transcript. + example: exons.fa - name: --w_add type: integer description: | diff --git a/src/gffread/script.sh b/src/gffread/script.sh index 4b50406b..47a45267 100644 --- a/src/gffread/script.sh +++ b/src/gffread/script.sh @@ -7,10 +7,12 @@ [[ "$par_coding" == "false" ]] && unset par_coding [[ "$par_strict_range" == "false" ]] && unset par_strict_range [[ "$par_no_single_exon" == "false" ]] && unset par_no_single_exon +[[ "$par_no_exon_attrs" == "false" ]] && unset par_no_exon_attrs [[ "$par_nc" == "false" ]] && unset par_nc [[ "$par_ignore_locus" == "false" ]] && unset par_ignore_locus [[ "$par_description" == "false" ]] && unset par_description [[ "$par_sort_alpha" == "false" ]] && unset par_sort_alpha +[[ "$par_keep_genes" == "false" ]] && unset par_keep_genes [[ "$par_keep_attrs" == "false" ]] && unset par_keep_attrs [[ "$par_keep_exon_attrs" == "false" ]] && unset par_keep_exon_attrs [[ "$par_keep_comments" == "false" ]] && unset par_keep_comments @@ -61,7 +63,7 @@ gffread \ ${par_merge_exons:+-Z} \ ${par_genome:+-g "$par_genome"} \ ${par_junctions:+-j} \ - ${par_spliced_exons:+-w "$par_spliced_exon"} \ + ${par_spliced_exons:+-w "$par_spliced_exons"} \ ${par_w_add:+--w-add "$par_w_add"} \ ${par_w_nocds:+--w-nocds} \ ${par_spliced_cds:+-x "$par_spliced_cds"} \ diff --git a/src/gffread/test.sh b/src/gffread/test.sh index 6593a035..698a6d7f 100755 --- a/src/gffread/test.sh +++ b/src/gffread/test.sh @@ -5,9 +5,11 @@ set -e -test_output_dir="$meta_resources_dir/test_data/test_output/" -test_dir="$meta_resources_dir/test_data/" -expected_output_dir="$meta_resources_dir/test_data/output/" +test_output_dir="${meta_resources_dir}test_data/test_output" +test_dir="${meta_resources_dir}test_data" +expected_output_dir="${meta_resources_dir}test_data/output" + +mkdir -p "$test_output_dir" ################################################################################ @@ -15,9 +17,9 @@ expected_output_dir="$meta_resources_dir/test_data/output/" echo "> Test 1 - Read annotation file, output GFF" "$meta_executable" \ - --expose_dups \ - --outfile "$test_output_dir/ann_simple.gff" \ - --input "$test_dir/annotation.gff" + -E \ + --input "$test_dir/annotation.gff" \ + -o "$test_output_dir/ann_simple.gff" echo ">> Check if output exists" [ ! -f "$test_output_dir/ann_simple.gff" ] \ @@ -28,9 +30,14 @@ echo ">> Check if output is empty" && echo "Output file test_output/ann_simple.gff is empty" && exit 1 echo ">> Compare output to expected output" -diff "$expected_output_dir/ann_simple.gff" "$test_output_dir/ann_simple.gff" || \ + +# Not comparing header lines, they are not in the same order and reference file +# was created with a different version of gffread. +diff <(grep -v '^#' "$expected_output_dir/ann_simple.gff") \ + <(grep -v '^#' "$test_output_dir/ann_simple.gff") || \ (echo "Output file ann_simple.gff does not match expected output" && exit 1) + ################################################################################ echo "> Test 2 - Read annotation file, output GTF" @@ -49,6 +56,13 @@ echo ">> Check if output is empty" && echo "Output file test_output/annotation.gtf is empty" && exit 1 echo ">> Compare output to expected output" + +# remove trailing semicolons from the files +# Difference in trailing semicolon presence might be due to reference output +# being generated by gffred version 11.8 instead of 12.7. +sed -i 's/;$/''/' "$expected_output_dir/annotation.gtf" +sed -i 's/;$/''/' "$test_output_dir/annotation.gtf" + diff "$expected_output_dir/annotation.gtf" "$test_output_dir/annotation.gtf" || \ (echo "Output file annotation.gtf does not match expected output" && exit 1) @@ -56,18 +70,19 @@ diff "$expected_output_dir/annotation.gtf" "$test_output_dir/annotation.gtf" || echo "> Test 3 - Generate fasta file from annotation file" + "$meta_executable" \ + --genome "$test_dir/genome.fa" \ --spliced_exons "$test_output_dir/transcripts.fa" \ - --genome "genome.fa" \ - --input "annotation.gff" + --input "$test_dir/annotation.gff" echo ">> Check if output exists" [ ! -f "$test_output_dir/transcripts.fa" ] \ - && echo "Output file test_output/transcripts.fa does not exist" && exit 1 + && echo "Output file transcripts.fa does not exist" && exit 1 echo ">> Check if output is empty" [ ! -s "$test_output_dir/transcripts.fa" ] \ - && echo "Output file test_output/transcripts.fa is empty" && exit 1 + && echo "Output file transcripts.fa is empty" && exit 1 echo ">> Compare output to expected output" diff "$expected_output_dir/transcripts.fa" "$test_output_dir/transcripts.fa" || \ @@ -80,7 +95,7 @@ echo "> Test 4 - Generate table from GFF annotation file" "$meta_executable" \ --table @id,@chr,@start,@end,@strand,@exons,Name,gene,product \ --outfile "$test_output_dir/annotation.tbl" \ - --input "annotation.gff" + --input "$test_dir/annotation.gff" echo ">> Check if output exists" [ ! -f "$test_output_dir/annotation.tbl" ] \ @@ -94,6 +109,9 @@ echo ">> Compare output to expected output" diff "$expected_output_dir/annotation.tbl" "$test_output_dir/annotation.tbl" || \ (echo "Output file annotation.tbl does not match expected output" && exit 1) +# reference output annotation.tbl was created manually since it was not included +# in the original test data. + ################################################################################ echo "> All tests successful" diff --git a/src/gffread/test_data/output/annotation.tbl b/src/gffread/test_data/output/annotation.tbl new file mode 100644 index 00000000..d707d61a --- /dev/null +++ b/src/gffread/test_data/output/annotation.tbl @@ -0,0 +1,4 @@ +rna157470 NT_187562.1 411 68627 + 411-495,1995-2051,2602-2726,9665-9753,12115-12164,12577-12744,14174-14326,14664-14864,16634-16788,17438-17606,17880-17976,18710-18822,20083-20208,21352-21480,21736-21846,22191-22253,24448-24582,25379-25466,26264-26402,27215-27348,56500-56534,56627-56743,57445-57593,58211-58295,59473-59529,61493-61617,62682-62770,64510-64559,65149-65319,67694-68627 NM_004668.2 MGAM maltase-glucoamylase +rna157473 NT_187562.1 214038 219958 - 214038-214270,214372-214511,216955-217211,217435-217573,219568-219648,219891-219958 NM_001001317.4 PRSS58 protease%2C serine 58 +rna157474 NT_187562.1 230181 234148 - 230181-231784,233047-233200,233852-234148 NR_036483.2 TRY2P trypsinogen-like pseudogene +rna157497 NT_187562.1 962573 963937 + 962573-962821,963419-963937 NM_178829.4 C7orf34 chromosome 7 open reading frame 34