Skip to content

Commit

Permalink
More tests and debugging
Browse files Browse the repository at this point in the history
  • Loading branch information
tgaspe committed Aug 29, 2024
1 parent 981f6f2 commit 8dc31c2
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 29 deletions.
21 changes: 8 additions & 13 deletions src/bcftools/bcftools_norm/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,26 +51,21 @@ argument_groups:
choices: [".", "*"]
description: |
Use the star allele (*) for overlapping alleles or set to missing (.).
default: "*"
#default: "*"

- name: --check_ref
alternatives: -c
type: string
choices: ['e', 'w', 'x', 's']
description: |
Check REF alleles and exit (e), warn (w), exclude (x), or set (s) bad sites.
default: "e"
#default: "e"

- name: --remove_duplicates
alternatives: -D
type: boolean_true
description: Remove duplicate lines of the same type.

- name: --rm_dup
alternatives: -d
type: string
choices: ['snps', 'indels', 'both', 'all', 'exact']
description: Remove duplicate snps, indels, both, all, or exact matches.
choices: ['snps', 'indels', 'both', 'all', 'exact', 'none']
description: Remove duplicate snps, indels, both, all, exact matches, or none (old -D option).

- name: --fasta_ref
alternatives: -f
Expand Down Expand Up @@ -134,14 +129,14 @@ argument_groups:
choices: ['pos', 'record', 'variant', '1', '2', '3']
description: |
Include if POS in the region (0), record overlaps (1), variant overlaps (2).
default: "1"
#default: "1"

- name: --site_win
alternatives: -w
type: integer
description: |
Buffer for sorting lines that changed position during realignment.
default: 1000
#default: 1000

- name: --strict_filter
alternatives: -s
Expand All @@ -163,13 +158,13 @@ argument_groups:
choices: ['pos', 'record', 'variant', '1', '2', '3']
description: |
Include if POS in the region (0), record overlaps (1), variant overlaps (2).
default: "0"
#default: "0"

- name: --threads
type: integer
description: |
Use multithreading with the specified number of worker threads.
default: 0
#default: 0


resources:
Expand Down
23 changes: 11 additions & 12 deletions src/bcftools/bcftools_norm/script.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,23 +25,22 @@ done
bcftools norm \
${par_atomize:+--atomize} \
${par_atom_overlaps:+--atom-overlaps "$par_atom_overlaps"} \
${par_check_ref:+--check-ref "$par_check_ref"} \
${par_remove_duplicates:+--remove-duplicates} \
${par_rm_dup:+--rm-dup "$par_rm_dup"} \
${par_fasta_ref:+--fasta-ref "$par_fasta_ref"} \
${par_check_ref:+-c "$par_check_ref"} \
${par_remove_duplicates:+-d "$par_remove_duplicates"} \
${par_fasta_ref:+-f "$par_fasta_ref"} \
${par_force:+--force} \
${par_keep_sum:+--keep-sum "$par_keep_sum"} \
${par_multiallelics:+--multiallelics "$par_multiallelics"} \
${par_multiallelics:+-m "$par_multiallelics"} \
${par_no_version:+--no-version} \
${par_do_not_normalize:+--do-not-normalize} \
${par_do_not_normalize:+-N} \
${par_old_rec_tag:+--old-rec-tag "$par_old_rec_tag"} \
${par_regions:+--regions "$par_regions"} \
${par_regions_file:+--regions-file "$par_regions_file"} \
${par_regions:+-r "$par_regions"} \
${par_regions_file:+-R "$par_regions_file"} \
${par_regions_overlap:+--regions-overlap "$par_regions_overlap"} \
${par_site_win:+--site-win "$par_site_win"} \
${par_strict_filter:+--strict-filter} \
${par_targets:+--targets "$par_targets"} \
${par_targets_file:+--targets-file "$par_targets_file"} \
${par_site_win:+-w "$par_site_win"} \
${par_strict_filter:+-s} \
${par_targets:+-t "$par_targets"} \
${par_targets_file:+-T "$par_targets_file"} \
${par_targets_overlap:+--targets-overlap "$par_targets_overlap"} \
${par_threads:+--threads "$par_threads"} \
${par_output_type:+-O "$par_output_type"} \
Expand Down
71 changes: 67 additions & 4 deletions src/bcftools/bcftools_norm/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,31 +38,94 @@ cat <<EOF > "$TMPDIR/example.vcf"
##fileformat=VCFv4.1
##contig=<ID=1,length=249250621,assembly=b37>
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE1
1 752567 llama A C . . . . .
1 752722 . G A . . . . .
1 752567 llama G C,A . . . . 1/2
1 752722 . G A,AAA . . . . ./.
EOF

bgzip -c $TMPDIR/example.vcf > $TMPDIR/example.vcf.gz
tabix -p vcf $TMPDIR/example.vcf.gz

cat <<EOF > "$TMPDIR/reference.fa"
>1
ATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCG
>2
CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT
EOF

# Test 1: Remove ID annotations
mkdir "$TMPDIR/test1" && pushd "$TMPDIR/test1" > /dev/null

echo "> Run bcftools_norm"
"$meta_executable" \
--input "../example.vcf" \
--output "normalized.vcf" \
--atomize \
--atom_overlaps "." \

# checks
assert_file_exists "normalized.vcf"
assert_file_not_empty "normalized.vcf"
assert_file_contains "normalized.vcf" "##fileformat=VCFv4.2"
assert_file_contains "normalized.vcf" "bcftools_normCommand=norm --atomize --atom-overlaps . -o normalized.vcf ../example.vcf"
echo "- test1 succeeded -"

popd > /dev/null

# Test 2: Check reference
mkdir "$TMPDIR/test2" && pushd "$TMPDIR/test2" > /dev/null

echo "> Run bcftools_norm with remove duplicates"
"$meta_executable" \
--input "../example.vcf" \
--output "normalized.vcf" \
--atomize \
--remove_duplicates 'all' \

# checks
assert_file_exists "normalized.vcf"
assert_file_not_empty "normalized.vcf"
assert_file_contains "normalized.vcf" "norm --atomize -d all -o normalized.vcf ../example.vcf"
echo "- test2 succeeded -"

popd > /dev/null

# Test 3: Check reference and fasta reference
mkdir "$TMPDIR/test3" && pushd "$TMPDIR/test3" > /dev/null

echo "> Run bcftools_norm with check reference and fasta reference"
"$meta_executable" \
--input "../example.vcf" \
--output "normalized.vcf" \
--atomize \
--fasta_ref "../reference.fa" \
--check_ref "e" \

# checks
assert_file_exists "normalized.vcf"
assert_file_not_empty "normalized.vcf"
assert_file_contains "normalized.vcf" "norm --atomize -c e -f ../reference.fa -o normalized.vcf ../example.vcf"
echo "- test3 succeeded -"

popd > /dev/null

# Test 4: Multiallelics
mkdir "$TMPDIR/test4" && pushd "$TMPDIR/test4" > /dev/null

echo "> Run bcftools_norm with multiallelics"
"$meta_executable" \
--input "../example.vcf" \
--output "normalized.vcf" \
--multiallelics "-any" \
--old_rec_tag "wazzaaa" \

# checks
assert_file_exists "normalized.vcf"
assert_file_not_empty "normalized.vcf"
assert_file_contains "normalized.vcf" "norm -m -any --old-rec-tag wazzaaa -o normalized.vcf ../example.vcf"
echo "- test4 succeeded -"

echo "---- All tests succeeded! ----"
exit 0


# echo
# cat "normalized.vcf"
# echo

0 comments on commit 8dc31c2

Please sign in to comment.