Skip to content

Commit

Permalink
test data files and changes to script
Browse files Browse the repository at this point in the history
  • Loading branch information
emmarousseau committed May 22, 2024
1 parent 2e227e4 commit 13e8703
Show file tree
Hide file tree
Showing 6 changed files with 93 additions and 26 deletions.
14 changes: 6 additions & 8 deletions src/umi_tools/umi_tools_dedup/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,6 @@ argument_groups:
- name: --bai
type: file
description: BAM index
- name: --get_output_stats
type: boolean_true
description: Generate output stats.
- name: --random_seed
type: integer
description: |
Expand All @@ -55,9 +52,10 @@ argument_groups:
use of the template length to determine reads with the same mapping
coordinates.
- name: --output_stats
type: file
description: Directory containing UMI based deduplication statistics files
direction: output
type: string
description: |
Generate files containing UMI based deduplication statistics files with this prefix
in the file names.
- name: --extract_umi_method
type: string
description: |
Expand Down Expand Up @@ -228,7 +226,7 @@ argument_groups:
description: |
Ignore the UMI and group reads using mapping coordinates only.
- name: --subset
type: boolean_true
type: double
description: |
Only consider a fraction of the reads, chosen at random. This is useful
for doing saturation analyses.
Expand Down Expand Up @@ -269,7 +267,7 @@ argument_groups:
alternatives: -v
type: integer
description: Log level. The higher, the more output.
default: 1
default: 0
- name: --error
alternatives: -E
type: file
Expand Down
4 changes: 1 addition & 3 deletions src/umi_tools/umi_tools_dedup/script.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,11 @@ test_dir="${metal_executable}/test_data"
[[ "$par_ignore_umi" == "false" ]] && unset par_ignore_umi
[[ "$par_subset" == "false" ]] && unset par_subset
[[ "$par_log2stderr" == "false" ]] && unset par_log2stderr
[[ "$par_get_output_stats" == "false" ]] && unset par_get_output_stats
[[ "$par_read_length" == "false" ]] && unset par_read_length

umi_tools dedup \
--stdin "$par_input" \
${par_in_sam:+--in-sam} \
${par_get_output_stats:+--get-output-stats} \
-S "$par_output" \
${par_out_sam:+--out-sam} \
${par_paired:+--paired} \
Expand Down Expand Up @@ -56,7 +54,7 @@ umi_tools dedup \
${par_chimeric_pairs:+--chimeric-pairs "$par_chimeric_pairs"} \
${par_unapired_reads:+--unapired-reads "$par_unapired_reads"} \
${par_ignore_umi:+--ignore-umi} \
${par_subset:+--subset} \
${par_subset:+--subset "$par_subset"} \
${par_chrom:+--chrom "$par_chrom"} \
${par_no_sort_output:+--no-sort-output} \
${par_buffer_whole_contig:+--buffer-whole-contig} \
Expand Down
42 changes: 27 additions & 15 deletions src/umi_tools/umi_tools_dedup/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,39 +13,51 @@ echo ">>> Test 1: Basic usage of $meta_functionality_name with statistics output
--paired \
--input "$test_dir/sample.bam" \
--bai "$test_dir/sample.bam.bai" \
--output "$out_dir/deduped.bam" \
--output_stats deduped \
--output "$out_dir/deduped.sam" \
--out_sam \
--output_stats "$out_dir/dedup" \
--random_seed 1

echo ">>> Checking whether output exists"
[ ! -f "$out_dir/deduped.bam" ] && echo "File 'deduped.bam' does not exist!" && exit 1
[ ! -f "$out_dir/deduped_edit_distance.tsv" ] && echo "File 'deduped_edit_distance.tsv' does not exist!" && exit 1
[ ! -f "$out_dir/deduped.sam" ] && echo "File 'deduped.sam' does not exist!" && exit 1
[ ! -f "$out_dir/dedup_edit_distance.tsv" ] && echo "File 'dedup_edit_distance.tsv' does not exist!" && exit 1

echo ">>> Checking whether output is non-empty"
[ ! -s "$out_dir/deduped.bam" ] && echo "File 'deduped.bam' is empty!" && exit 1
[ ! -s "$out_dir/deduped_edit_distance.tsv" ] && echo "File 'deduped_edit_distance.tsv' is empty!" && exit 1
[ ! -s "$out_dir/deduped.sam" ] && echo "File 'deduped.sam' is empty!" && exit 1
[ ! -s "$out_dir/dedup_edit_distance.tsv" ] && echo "File 'dedup_edit_distance.tsv' is empty!" && exit 1

echo ">>> Checking whether output is correct"
diff "$out_dir/deduped.bam" "$test_dir/deduped.bam" || \
(echo "Output file deduped.bam does not match expected output" && exit 1)
diff "$out_dir/deduped_edit_distance.tsv" "$test_dir/deduped_edit_distance.tsv" || \
(echo "Output file deduped_edit_distance.tsv does not match expected output" && exit 1)
diff "$out_dir/deduped.sam" "$test_dir/deduped.sam" || \
(echo "Output file deduped.sam does not match expected output" && exit 1)
diff "$out_dir/dedup_edit_distance.tsv" "$test_dir/dedup_edit_distance.tsv" || \
(echo "Output file dedup_edit_distance.tsv does not match expected output" && exit 1)

############################################################################################

echo ">>> Test 2: $meta_functionality_name"
echo ">>> Test 2: $meta_functionality_name with random subset selection"

"$meta_executable" \
--paired \
--input "$test_dir/sample.bam" \
--bai "$test_dir/sample.bam.bai" \
--output "$out_dir/deduped.bam" \
--random_seed 1 \
--output "$out_dir/deduped_fraction.sam" \
--out_sam \
--subset 0.5 \
--random_seed 1


echo ">>> Checking whether output exists"
[ ! -f "$out_dir/deduped.bam" ] && echo "File 'deduped.bam' does not exist!" && exit 1
[]
[ ! -f "$out_dir/deduped_fraction.sam" ] && echo "File 'deduped_fraction.sam' does not exist!" && exit 1

echo ">>> Checking whether output is non-empty"
[ ! -s "$out_dir/deduped_fraction.sam" ] && echo "File 'deduped_fraction.sam' is empty!" && exit 1

echo ">>> Checking whether output is correct"
diff "$out_dir/deduped_fraction.sam" "$test_dir/deduped_fraction.sam" || \
(echo "Output file deduped_fraction.sam does not match expected output" && exit 1)

############################################################################################

rm -rf "$out_dir"

echo "All tests succeeded!"
Expand Down
Binary file removed src/umi_tools/umi_tools_dedup/test_data/deduped.bam
Binary file not shown.
30 changes: 30 additions & 0 deletions src/umi_tools/umi_tools_dedup/test_data/deduped.sam
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
@HD VN:1.0 SO:coordinate
@SQ SN:chr1 LN:197195432
@SQ SN:chr10 LN:129993255
@SQ SN:chr11 LN:121843856
@SQ SN:chr12 LN:121257530
@SQ SN:chr13 LN:120284312
@SQ SN:chr14 LN:125194864
@SQ SN:chr15 LN:103494974
@SQ SN:chr16 LN:98319150
@SQ SN:chr17 LN:95272651
@SQ SN:chr18 LN:90772031
@SQ SN:chr19 LN:61342430
@SQ SN:chr2 LN:181748087
@SQ SN:chr3 LN:159599783
@SQ SN:chr4 LN:155630120
@SQ SN:chr5 LN:152537259
@SQ SN:chr6 LN:149517037
@SQ SN:chr7 LN:152524553
@SQ SN:chr8 LN:131738871
@SQ SN:chr9 LN:124076172
@SQ SN:chrM LN:16299
@SQ SN:chrX LN:166650296
@SQ SN:chrY LN:15902555
@PG ID:Bowtie VN:1.1.2 CL:"bowtie --wrapper basic-0 --threads 4 -v 2 -m 10 -k 1 /ifs/mirror/genomes/bowtie/mm9 /dev/fd/63 --sam"
@PG ID:samtools PN:samtools PP:Bowtie VN:1.19.2 CL:samtools view -h example.bam
@PG ID:samtools.1 PN:samtools PP:samtools VN:1.19.2 CL:samtools view -bS -
SRR2057595.5052066_ACCGGTTTA 16 chr1 3812794 255 51M * 0 0 * * XA:i:2 MD:Z:42T2T5 NM:i:2
SRR2057595.13520751_CCAGGTTCT 16 chr1 3967622 255 20M * 0 0 * * XA:i:2 MD:Z:12A0C6 NM:i:2
SRR2057595.8901432_AGCGGTTAC 0 chr1 4369756 255 20M * 0 0 * * XA:i:2 MD:Z:1T4A13 NM:i:2
SRR2057595.1210348_ACTGGTTTC 0 chr1 4762503 255 45M * 0 0 * * XA:i:2 MD:Z:0C7A36 NM:i:2
29 changes: 29 additions & 0 deletions src/umi_tools/umi_tools_dedup/test_data/deduped_fraction.sam
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
@HD VN:1.0 SO:coordinate
@SQ SN:chr1 LN:197195432
@SQ SN:chr10 LN:129993255
@SQ SN:chr11 LN:121843856
@SQ SN:chr12 LN:121257530
@SQ SN:chr13 LN:120284312
@SQ SN:chr14 LN:125194864
@SQ SN:chr15 LN:103494974
@SQ SN:chr16 LN:98319150
@SQ SN:chr17 LN:95272651
@SQ SN:chr18 LN:90772031
@SQ SN:chr19 LN:61342430
@SQ SN:chr2 LN:181748087
@SQ SN:chr3 LN:159599783
@SQ SN:chr4 LN:155630120
@SQ SN:chr5 LN:152537259
@SQ SN:chr6 LN:149517037
@SQ SN:chr7 LN:152524553
@SQ SN:chr8 LN:131738871
@SQ SN:chr9 LN:124076172
@SQ SN:chrM LN:16299
@SQ SN:chrX LN:166650296
@SQ SN:chrY LN:15902555
@PG ID:Bowtie VN:1.1.2 CL:"bowtie --wrapper basic-0 --threads 4 -v 2 -m 10 -k 1 /ifs/mirror/genomes/bowtie/mm9 /dev/fd/63 --sam"
@PG ID:samtools PN:samtools PP:Bowtie VN:1.19.2 CL:samtools view -h example.bam
@PG ID:samtools.1 PN:samtools PP:samtools VN:1.19.2 CL:samtools view -bS -
SRR2057595.4062788_ACCGGTTTA 16 chr1 3812793 255 52M * 0 0 * * XA:i:2 MD:Z:43T2T5 NM:i:2
SRR2057595.8901432_AGCGGTTAC 0 chr1 4369756 255 20M * 0 0 * * XA:i:2 MD:Z:1T4A13 NM:i:2
SRR2057595.1999468_ACTGGTTTC 0 chr1 4762503 255 45M * 0 0 * * XA:i:2 MD:Z:0C7A36 NM:i:2

0 comments on commit 13e8703

Please sign in to comment.