From 8dc31c2c8b9c2739465641505b2a00f665eaa276 Mon Sep 17 00:00:00 2001 From: tgaspe Date: Thu, 29 Aug 2024 11:11:37 +0200 Subject: [PATCH] More tests and debugging --- src/bcftools/bcftools_norm/config.vsh.yaml | 21 +++---- src/bcftools/bcftools_norm/script.sh | 23 ++++--- src/bcftools/bcftools_norm/test.sh | 71 ++++++++++++++++++++-- 3 files changed, 86 insertions(+), 29 deletions(-) diff --git a/src/bcftools/bcftools_norm/config.vsh.yaml b/src/bcftools/bcftools_norm/config.vsh.yaml index 8fdd1e46..22f4b612 100644 --- a/src/bcftools/bcftools_norm/config.vsh.yaml +++ b/src/bcftools/bcftools_norm/config.vsh.yaml @@ -51,7 +51,7 @@ argument_groups: choices: [".", "*"] description: | Use the star allele (*) for overlapping alleles or set to missing (.). - default: "*" + #default: "*" - name: --check_ref alternatives: -c @@ -59,18 +59,13 @@ argument_groups: choices: ['e', 'w', 'x', 's'] description: | Check REF alleles and exit (e), warn (w), exclude (x), or set (s) bad sites. - default: "e" + #default: "e" - name: --remove_duplicates - alternatives: -D - type: boolean_true - description: Remove duplicate lines of the same type. - - - name: --rm_dup alternatives: -d type: string - choices: ['snps', 'indels', 'both', 'all', 'exact'] - description: Remove duplicate snps, indels, both, all, or exact matches. + choices: ['snps', 'indels', 'both', 'all', 'exact', 'none'] + description: Remove duplicate snps, indels, both, all, exact matches, or none (old -D option). - name: --fasta_ref alternatives: -f @@ -134,14 +129,14 @@ argument_groups: choices: ['pos', 'record', 'variant', '1', '2', '3'] description: | Include if POS in the region (0), record overlaps (1), variant overlaps (2). - default: "1" + #default: "1" - name: --site_win alternatives: -w type: integer description: | Buffer for sorting lines that changed position during realignment. - default: 1000 + #default: 1000 - name: --strict_filter alternatives: -s @@ -163,13 +158,13 @@ argument_groups: choices: ['pos', 'record', 'variant', '1', '2', '3'] description: | Include if POS in the region (0), record overlaps (1), variant overlaps (2). - default: "0" + #default: "0" - name: --threads type: integer description: | Use multithreading with the specified number of worker threads. - default: 0 + #default: 0 resources: diff --git a/src/bcftools/bcftools_norm/script.sh b/src/bcftools/bcftools_norm/script.sh index 0f52fc54..2a9637b1 100644 --- a/src/bcftools/bcftools_norm/script.sh +++ b/src/bcftools/bcftools_norm/script.sh @@ -25,23 +25,22 @@ done bcftools norm \ ${par_atomize:+--atomize} \ ${par_atom_overlaps:+--atom-overlaps "$par_atom_overlaps"} \ - ${par_check_ref:+--check-ref "$par_check_ref"} \ - ${par_remove_duplicates:+--remove-duplicates} \ - ${par_rm_dup:+--rm-dup "$par_rm_dup"} \ - ${par_fasta_ref:+--fasta-ref "$par_fasta_ref"} \ + ${par_check_ref:+-c "$par_check_ref"} \ + ${par_remove_duplicates:+-d "$par_remove_duplicates"} \ + ${par_fasta_ref:+-f "$par_fasta_ref"} \ ${par_force:+--force} \ ${par_keep_sum:+--keep-sum "$par_keep_sum"} \ - ${par_multiallelics:+--multiallelics "$par_multiallelics"} \ + ${par_multiallelics:+-m "$par_multiallelics"} \ ${par_no_version:+--no-version} \ - ${par_do_not_normalize:+--do-not-normalize} \ + ${par_do_not_normalize:+-N} \ ${par_old_rec_tag:+--old-rec-tag "$par_old_rec_tag"} \ - ${par_regions:+--regions "$par_regions"} \ - ${par_regions_file:+--regions-file "$par_regions_file"} \ + ${par_regions:+-r "$par_regions"} \ + ${par_regions_file:+-R "$par_regions_file"} \ ${par_regions_overlap:+--regions-overlap "$par_regions_overlap"} \ - ${par_site_win:+--site-win "$par_site_win"} \ - ${par_strict_filter:+--strict-filter} \ - ${par_targets:+--targets "$par_targets"} \ - ${par_targets_file:+--targets-file "$par_targets_file"} \ + ${par_site_win:+-w "$par_site_win"} \ + ${par_strict_filter:+-s} \ + ${par_targets:+-t "$par_targets"} \ + ${par_targets_file:+-T "$par_targets_file"} \ ${par_targets_overlap:+--targets-overlap "$par_targets_overlap"} \ ${par_threads:+--threads "$par_threads"} \ ${par_output_type:+-O "$par_output_type"} \ diff --git a/src/bcftools/bcftools_norm/test.sh b/src/bcftools/bcftools_norm/test.sh index 0f00cbf1..150aab9f 100644 --- a/src/bcftools/bcftools_norm/test.sh +++ b/src/bcftools/bcftools_norm/test.sh @@ -38,13 +38,20 @@ cat < "$TMPDIR/example.vcf" ##fileformat=VCFv4.1 ##contig= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE1 -1 752567 llama A C . . . . . -1 752722 . G A . . . . . +1 752567 llama G C,A . . . . 1/2 +1 752722 . G A,AAA . . . . ./. EOF bgzip -c $TMPDIR/example.vcf > $TMPDIR/example.vcf.gz tabix -p vcf $TMPDIR/example.vcf.gz +cat < "$TMPDIR/reference.fa" +>1 +ATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCG +>2 +CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT +EOF + # Test 1: Remove ID annotations mkdir "$TMPDIR/test1" && pushd "$TMPDIR/test1" > /dev/null @@ -52,17 +59,73 @@ echo "> Run bcftools_norm" "$meta_executable" \ --input "../example.vcf" \ --output "normalized.vcf" \ + --atomize \ + --atom_overlaps "." \ # checks assert_file_exists "normalized.vcf" assert_file_not_empty "normalized.vcf" -assert_file_contains "normalized.vcf" "##fileformat=VCFv4.2" +assert_file_contains "normalized.vcf" "bcftools_normCommand=norm --atomize --atom-overlaps . -o normalized.vcf ../example.vcf" echo "- test1 succeeded -" popd > /dev/null +# Test 2: Check reference +mkdir "$TMPDIR/test2" && pushd "$TMPDIR/test2" > /dev/null + +echo "> Run bcftools_norm with remove duplicates" +"$meta_executable" \ + --input "../example.vcf" \ + --output "normalized.vcf" \ + --atomize \ + --remove_duplicates 'all' \ + +# checks +assert_file_exists "normalized.vcf" +assert_file_not_empty "normalized.vcf" +assert_file_contains "normalized.vcf" "norm --atomize -d all -o normalized.vcf ../example.vcf" +echo "- test2 succeeded -" + +popd > /dev/null + +# Test 3: Check reference and fasta reference +mkdir "$TMPDIR/test3" && pushd "$TMPDIR/test3" > /dev/null + +echo "> Run bcftools_norm with check reference and fasta reference" +"$meta_executable" \ + --input "../example.vcf" \ + --output "normalized.vcf" \ + --atomize \ + --fasta_ref "../reference.fa" \ + --check_ref "e" \ + +# checks +assert_file_exists "normalized.vcf" +assert_file_not_empty "normalized.vcf" +assert_file_contains "normalized.vcf" "norm --atomize -c e -f ../reference.fa -o normalized.vcf ../example.vcf" +echo "- test3 succeeded -" + +popd > /dev/null + +# Test 4: Multiallelics +mkdir "$TMPDIR/test4" && pushd "$TMPDIR/test4" > /dev/null + +echo "> Run bcftools_norm with multiallelics" +"$meta_executable" \ + --input "../example.vcf" \ + --output "normalized.vcf" \ + --multiallelics "-any" \ + --old_rec_tag "wazzaaa" \ + +# checks +assert_file_exists "normalized.vcf" +assert_file_not_empty "normalized.vcf" +assert_file_contains "normalized.vcf" "norm -m -any --old-rec-tag wazzaaa -o normalized.vcf ../example.vcf" +echo "- test4 succeeded -" echo "---- All tests succeeded! ----" exit 0 - +# echo +# cat "normalized.vcf" +# echo