Skip to content

Commit

Permalink
debugging and adding tests
Browse files Browse the repository at this point in the history
  • Loading branch information
tgaspe committed Aug 27, 2024
1 parent 254e49c commit ec665bc
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 47 deletions.
24 changes: 19 additions & 5 deletions src/bcftools/bcftools_stats/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,8 @@ argument_groups:
alternatives: --i
type: string
description: |
Select sites for which the expression is true (see man page for details)
Select sites for which the expression is true.
See https://samtools.github.io/bcftools/bcftools.html#expressions for details.
- name: --split_by_ID
alternatives: --I
Expand All @@ -129,8 +130,13 @@ argument_groups:
- name: --regions_overlap
type: string
choices: ['pos', 'record', 'variant', '0', '1', '2']
description: |
Include if POS in the region (0), record overlaps (1), variant overlaps (2) [1].
This option controls how overlapping records are determined:
set to 'pos' or '0' if the VCF record has to have POS inside a region (this corresponds to the default behavior of -t/-T);
set to 'record' or '1' if also overlapping records with POS outside a region should be included (this is the default behavior of -r/-R,
and includes indels with POS at the end of a region, which are technically outside the region);
or set to 'variant' or '2' to include only true overlapping variation (compare the full VCF representation "TA>T-" vs the true sequence variation "A>-").
- name: --samples
alternatives: --s
Expand All @@ -148,7 +154,14 @@ argument_groups:
alternatives: --t
type: string
description: |
Similar to -r but streams rather than index-jumps.
Similar as -r, --regions, but the next position is accessed by streaming the whole VCF/BCF
rather than using the tbi/csi index. Both -r and -t options can be applied simultaneously: -r uses the
index to jump to a region and -t discards positions which are not in the targets. Unlike -r, targets
can be prefixed with "^" to request logical complement. For example, "^X,Y,MT" indicates that
sequences X, Y and MT should be skipped. Yet another difference between the -t/-T and -r/-R is
that -r/-R checks for proper overlaps and considers both POS and the end position of an indel,
while -t/-T considers the POS coordinate only (by default; see also --regions-overlap and --targets-overlap).
Note that -t cannot be used in combination with -T.
- name: --targets_file
alternatives: --T
Expand All @@ -157,9 +170,10 @@ argument_groups:
Similar to -R but streams rather than index-jumps.
- name: --targets_overlaps
type: file
type: string
choices: ['pos', 'record', 'variant', '0', '1', '2']
description: |
Include if POS in the region (0), record overlaps (1), variant overlaps (2) [0].
Include if POS in the region (0), record overlaps (1), variant overlaps (2).
- name: --user_tstv
alternatives: --u
Expand Down
84 changes: 42 additions & 42 deletions src/bcftools/bcftools_stats/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ cat <<EOF > "$TMPDIR/example.vcf"
20 1234567 microsat1 G GA,GAC 50 PASS NS=3;DP=9;AA=G;AN=6;AC=3,1 GT:GQ:DP 0/1:.:4 0/2:17:2 1/1:40:3
EOF

# bgzip -c $TMPDIR/example.vcf > $TMPDIR/example.vcf

cat <<EOF > "$TMPDIR/exons.bed"
chr19 12345 12567
chr20 23456 23789
Expand Down Expand Up @@ -162,7 +164,7 @@ echo "- test4 succeeded -"

popd > /dev/null

# Test 5: Exons, Apply Filters, Fasta Reference
# Test 5: Exons, Apply Filters
mkdir "$TMPDIR/test5" && pushd "$TMPDIR/test5" > /dev/null

echo "> Run bcftools_stats on VCF file with exons, apply filters, and fasta reference"
Expand All @@ -181,61 +183,59 @@ echo "- test5 succeeded -"

popd > /dev/null

# Test 6: Include, Regions, Regions File
# Test 6: Include, Regions
mkdir "$TMPDIR/test6" && pushd "$TMPDIR/test6" > /dev/null

echo "> Run bcftools_stats on VCF file with include, regions, and regions file"
echo "> Run bcftools_stats on VCF file with include, regions"
"$meta_executable" \
--input "../example.vcf" \
--output "stats.txt" \
--include "GT='mis'" \
--regions "20" \
# --regions "19" \

# checks
assert_file_exists "stats.txt"
assert_file_not_empty "stats.txt"
assert_file_contains "stats.txt" "number of records: 8"
assert_file_contains "stats.txt" "bcftools stats -i GT='mis' ../example.vcf"
echo "- test6 succeeded -"

popd > /dev/null

# # Test 7: Regions Overlap, Samples, Samples File
# mkdir "$TMPDIR/test7" && pushd "$TMPDIR/test7" > /dev/null

# echo "> Run bcftools_stats on VCF file with regions overlap, samples, and samples file"
# "$meta_executable" \
# --input "../example.vcf" \
# --output "stats.txt" \
# --regions_overlap "20:1000000-2000000" \
# --samples "NA00001,NA00002" \
# --samples_file "samples.txt" \

# # checks
# assert_file_exists "stats.txt"
# assert_file_not_empty "stats.txt"
# assert_file_contains "stats.txt" "number of records: 8"
# echo "- test7 succeeded -"

# popd > /dev/null

# # Test 8: Targets, Targets File, Targets Overlaps
# mkdir "$TMPDIR/test8" && pushd "$TMPDIR/test8" > /dev/null

# echo "> Run bcftools_stats on VCF file with targets, targets file, and targets overlaps"
# "$meta_executable" \
# --input "../example.vcf" \
# --output "stats.txt" \
# --targets "20:1000000-2000000" \
# --targets_file "targets.bed" \
# --targets_overlaps "20:1000000-2000000" \

# # checks
# assert_file_exists "stats.txt"
# assert_file_not_empty "stats.txt"
# assert_file_contains "stats.txt" "number of records: 8"
# echo "- test8 succeeded -"

# popd > /dev/null
# Test 7: Regions Overlap, Samples, Samples File
mkdir "$TMPDIR/test7" && pushd "$TMPDIR/test7" > /dev/null

echo "> Run bcftools_stats on VCF file with regions overlap, samples, and samples file"
"$meta_executable" \
--input "../example.vcf" \
--output "stats.txt" \
--regions_overlap "record" \
--samples "NA00001,NA00002" \

# checks
assert_file_exists "stats.txt"
assert_file_not_empty "stats.txt"
assert_file_contains "stats.txt" "bcftools stats --regions-overlap record -s NA00001,NA00002 ../example.vcf"
echo "- test7 succeeded -"

popd > /dev/null

# Test 8: Targets, Targets File, Targets Overlaps
mkdir "$TMPDIR/test8" && pushd "$TMPDIR/test8" > /dev/null

echo "> Run bcftools_stats on VCF file with targets, targets file, and targets overlaps"
"$meta_executable" \
--input "../example.vcf" \
--output "stats.txt" \
--targets "20:1000000-2000000" \
--targets_overlaps "pos" \

# checks
assert_file_exists "stats.txt"
assert_file_not_empty "stats.txt"
assert_file_contains "stats.txt" "bcftools stats -t 20:1000000-2000000 --targets-overlap pos ../example.vcf"
echo "- test8 succeeded -"

popd > /dev/null

echo "---- All tests succeeded! ----"
exit 0
Expand Down

0 comments on commit ec665bc

Please sign in to comment.