diff --git a/src/bedtools/bedtools_merge/config.vsh.yaml b/src/bedtools/bedtools_merge/config.vsh.yaml index d472f1d2..f330dc11 100644 --- a/src/bedtools/bedtools_merge/config.vsh.yaml +++ b/src/bedtools/bedtools_merge/config.vsh.yaml @@ -61,12 +61,11 @@ argument_groups: - name: --columns alternatives: -c - type: integer + type: string description: | Specify columns from the B file to map onto intervals in A. Default: 5. Multiple columns can be specified in a comma-delimited list. - # maybe type is string??? ask tomorrow! - name: --operation alternatives: -o @@ -103,8 +102,8 @@ argument_groups: type: string description: | Specify a custom delimiter for the collapse operations. - - Example: -delim "|" - - Default: ",". + example: "|" + default: "," - name: --precision alternatives: -prec diff --git a/src/bedtools/bedtools_merge/script.sh b/src/bedtools/bedtools_merge/script.sh index f7184543..230a182c 100644 --- a/src/bedtools/bedtools_merge/script.sh +++ b/src/bedtools/bedtools_merge/script.sh @@ -19,7 +19,7 @@ bedtools merge \ ${par_distance:+-d "$par_distance"} \ ${par_columns:+-c "$par_columns"} \ ${par_operation:+-o "$par_operation"} \ - ${par_delimeter:+-delim "$par_delimeter"} \ + ${par_delimiter:+-delim "$par_delimiter"} \ ${par_precision:+-prec "$par_precision"} \ -i "$par_input" \ > "$par_output" diff --git a/src/bedtools/bedtools_merge/test.sh b/src/bedtools/bedtools_merge/test.sh index e4f7594c..3ed0e7dd 100644 --- a/src/bedtools/bedtools_merge/test.sh +++ b/src/bedtools/bedtools_merge/test.sh @@ -35,14 +35,16 @@ mkdir -p test_data # Create and populate example files printf "chr1\t100\t200\nchr1\t150\t250\nchr1\t300\t400\n" > "test_data/featureA.bed" printf "chr1\t100\t200\ta1\t1\t+\nchr1\t180\t250\ta2\t2\t+\nchr1\t250\t500\ta3\t3\t-\nchr1\t501\t1000\ta4\t4\t+\n" > "test_data/featureB.bed" +printf "chr1\t100\t200\ta1\t1.9\t+\nchr1\t180\t250\ta2\t2.5\t+\nchr1\t250\t500\ta3\t3.3\t-\nchr1\t501\t1000\ta4\t4\t+\n" > "test_data/feature_precision.bed" -# Create and populate example.gff file -printf "chr1\t.\tgene\t1000\t2000\t.\t+\t.\tID=gene1;Name=Gene1\n" >> "test_data/expected_sorted.gff" -printf "chr1\t.\texon\t1000\t1200\t.\t+\t.\tID=exon1;Parent=transcript1\n" >> "test_data/expected_sorted.gff" -printf "chr1\t.\tCDS\t1000\t1200\t.\t+\t0\tID=cds1;Parent=transcript1\n" >> "test_data/expected_sorted.gff" -printf "chr1\t.\tCDS\t1500\t1700\t.\t+\t2\tID=cds2;Parent=transcript1\n" >> "test_data/expected_sorted.gff" -printf "chr2\t.\texon\t1500\t1700\t.\t+\t.\tID=exon2;Parent=transcript1\n" >> "test_data/expected_sorted.gff" -printf "chr3\t.\tmRNA\t1000\t2000\t.\t+\t.\tID=transcript1;Parent=gene1\n" >> "test_data/expected_sorted.gff" +# Create and populate feature.gff file +printf "##gff-version 3\n" > "test_data/feature.gff" +printf "chr1\t.\tgene\t1000\t2000\t.\t+\t.\tID=gene1;Name=Gene1\n" >> "test_data/feature.gff" +printf "chr1\t.\texon\t1000\t1200\t.\t+\t.\tID=exon1;Parent=transcript1\n" >> "test_data/feature.gff" +printf "chr1\t.\tCDS\t1000\t1200\t.\t+\t0\tID=cds1;Parent=transcript1\n" >> "test_data/feature.gff" +printf "chr1\t.\tCDS\t1500\t1700\t.\t+\t2\tID=cds2;Parent=transcript1\n" >> "test_data/feature.gff" +printf "chr2\t.\texon\t1500\t1700\t.\t+\t.\tID=exon2;Parent=transcript1\n" >> "test_data/feature.gff" +printf "chr3\t.\tmRNA\t1000\t2000\t.\t+\t.\tID=transcript1;Parent=gene1\n" >> "test_data/feature.gff" # Create expected output files printf "chr1\t100\t250\nchr1\t300\t400\n" > "test_data/expected.bed" @@ -50,6 +52,10 @@ printf "chr1\t100\t250\nchr1\t250\t500\nchr1\t501\t1000\n" > "test_data/expected printf "chr1\t100\t250\nchr1\t501\t1000\n" > "test_data/expected_specific_strand.bed" printf "chr1\t128\t228\nchr1\t428\t528\n" > "test_data/expected_bam.bed" printf "chr1\t100\t400\n" > "test_data/expected_distance.bed" +printf "chr1\t100\t500\t2\t1\t3\nchr1\t501\t1000\t4\t4\t4\n" > "test_data/expected_operation.bed" +printf "chr1\t100\t500\ta1|a2|a3\nchr1\t501\t1000\ta4\n" > "test_data/expected_delim.bed" +printf "chr1\t100\t500\t2.567\nchr1\t501\t1000\t4\n" > "test_data/expected_precision.bed" +printf "##gff-version 3\nchr1\t999\t2000\nchr2\t1499\t1700\nchr3\t999\t2000\n" > "test_data/expected_header.bed" # Test 1: Default sort on BED file mkdir test1 @@ -140,24 +146,82 @@ echo "- test5 succeeded -" cd .. -# Test 6: columns option -# mkdir test6 -# cd test6 +# Test 6: columns option & operation option +mkdir test6 +cd test6 -# echo "> Run bedtools_merge on BED file with columns option" -# "$meta_executable" \ -# --input "../test_data/featureA.bed" \ -# --output "output.bed" \ -# --columns 2 +echo "> Run bedtools_merge on BED file with columns & operation options" +"$meta_executable" \ + --input "../test_data/featureB.bed" \ + --output "output.bed" \ + --columns 5 \ + --operation "mean,min,max" -# # checks -# assert_file_exists "output.bed" -# assert_file_not_empty "output.bed" -# assert_identical_content "output.bed" "../test_data/expected.bed" -# echo "- test6 succeeded -" +# checks +assert_file_exists "output.bed" +assert_file_not_empty "output.bed" +assert_identical_content "output.bed" "../test_data/expected_operation.bed" +echo "- test6 succeeded -" + +cd .. -# cd .. +# Test 8: delimeter option +mkdir test8 +cd test8 +echo "> Run bedtools_merge on BED file with delimeter option" +"$meta_executable" \ + --input "../test_data/featureB.bed" \ + --output "output.bed" \ + --columns 4 \ + --operation "collapse" \ + --delimiter "|" + +# checks +assert_file_exists "output.bed" +assert_file_not_empty "output.bed" +assert_identical_content "output.bed" "../test_data/expected_delim.bed" +echo "- test8 succeeded -" + +cd .. + +# Test 9: precision option +mkdir test9 +cd test9 + +echo "> Run bedtools_merge on BED file with precision option" +"$meta_executable" \ + --input "../test_data/feature_precision.bed" \ + --output "output.bed" \ + --columns 5 \ + --operation "mean" \ + --precision 4 + +# checks +assert_file_exists "output.bed" +assert_file_not_empty "output.bed" +assert_identical_content "output.bed" "../test_data/expected_precision.bed" +echo "- test9 succeeded -" + +cd .. + +# Test 10: header option +mkdir test10 +cd test10 + +echo "> Run bedtools_merge on GFF file with header option" +"$meta_executable" \ + --input "../test_data/feature.gff" \ + --output "output.gff" \ + --header + +# checks +assert_file_exists "output.gff" +assert_file_not_empty "output.gff" +assert_identical_content "output.gff" "../test_data/expected_header.bed" +echo "- test10 succeeded -" + +cd .. echo "---- All tests succeeded! ----" exit 0 diff --git a/src/bedtools/bedtools_merge/test_data/expected_bam.bed b/src/bedtools/bedtools_merge/test_data/expected_bam.bed deleted file mode 100644 index 45ad932c..00000000 --- a/src/bedtools/bedtools_merge/test_data/expected_bam.bed +++ /dev/null @@ -1,2 +0,0 @@ -chr1 128 228 -chr1 428 528 diff --git a/src/bedtools/bedtools_merge/test_data/featureA.bed b/src/bedtools/bedtools_merge/test_data/featureA.bed deleted file mode 100644 index 6c266581..00000000 --- a/src/bedtools/bedtools_merge/test_data/featureA.bed +++ /dev/null @@ -1,3 +0,0 @@ -chr1 100 200 -chr1 150 250 -chr1 300 400 diff --git a/src/bedtools/bedtools_merge/test_data/featureB.bed b/src/bedtools/bedtools_merge/test_data/featureB.bed deleted file mode 100644 index 4f2e2980..00000000 --- a/src/bedtools/bedtools_merge/test_data/featureB.bed +++ /dev/null @@ -1,4 +0,0 @@ -chr1 100 200 a1 1 + -chr1 180 250 a2 2 + -chr1 250 500 a3 3 - -chr1 501 1000 a4 4 +