From c587302cbf5ef99590727948ce54e443b85bf2dd Mon Sep 17 00:00:00 2001 From: tgaspe Date: Wed, 7 Aug 2024 10:23:40 -0300 Subject: [PATCH 01/17] Initial commit --- .../bedtools_genomecov/config.vsh.yaml | 79 ++++++++++++++ src/bedtools/bedtools_genomecov/help.txt | 101 ++++++++++++++++++ src/bedtools/bedtools_genomecov/script.sh | 13 +++ src/bedtools/bedtools_genomecov/test.sh | 58 ++++++++++ 4 files changed, 251 insertions(+) create mode 100644 src/bedtools/bedtools_genomecov/config.vsh.yaml create mode 100644 src/bedtools/bedtools_genomecov/help.txt create mode 100644 src/bedtools/bedtools_genomecov/script.sh create mode 100644 src/bedtools/bedtools_genomecov/test.sh diff --git a/src/bedtools/bedtools_genomecov/config.vsh.yaml b/src/bedtools/bedtools_genomecov/config.vsh.yaml new file mode 100644 index 00000000..fb2b39fe --- /dev/null +++ b/src/bedtools/bedtools_genomecov/config.vsh.yaml @@ -0,0 +1,79 @@ +name: bedtools_genomecov +namespace: bedtools +description: | + Compute the coverage of a feature file among a genome. +keywords: [genome coverage, BED] +links: + documentation: https://bedtools.readthedocs.io/en/latest/content/tools/genomecov.html + repository: https://github.com/arq5x/bedtools2 +references: + doi: 10.1093/bioinformatics/btq033 +license: GPL-2.0, MIT +requirements: + commands: [bedtools] +authors: + - __merge__: /src/_authors/theodoro_gasperin.yaml + roles: [ author, maintainer ] + +argument_groups: + - name: Inputs + arguments: + - name: --input + alternatives: -i + type: file + direction: input + description: | + The input file (BED/GFF/VCF) to be used. + required: true + example: input.bed + + - name: --genome + alternatives: -g + type: file + direction: input + description: | + The genome file to be used. + required: true + example: genome.txt + + - name: Outputs + arguments: + - name: --output + type: file + direction: output + description: | + The output BED file. + required: true + example: output.bed + + - name: Options + arguments: + - name: --input_bam + alternatives: -ibam + type: boolean_true + description: | + The input file is in BAM format. + Note: BAM _must_ be sorted by position + + +resources: + - type: bash_script + path: script.sh + +test_resources: + - type: bash_script + path: test.sh + +engines: + - type: docker + image: debian:stable-slim + setup: + - type: apt + packages: [bedtools, procps] + - type: docker + run: | + echo "bedtools: \"$(bedtools --version | sed -n 's/^bedtools //p')\"" > /var/software_versions.txt + +runners: + - type: executable + - type: nextflow \ No newline at end of file diff --git a/src/bedtools/bedtools_genomecov/help.txt b/src/bedtools/bedtools_genomecov/help.txt new file mode 100644 index 00000000..f13a71d3 --- /dev/null +++ b/src/bedtools/bedtools_genomecov/help.txt @@ -0,0 +1,101 @@ +```bash +bedtools genomecov +``` + +Tool: bedtools genomecov (aka genomeCoverageBed) +Version: v2.30.0 +Summary: Compute the coverage of a feature file among a genome. + +Usage: bedtools genomecov [OPTIONS] -i -g + +Options: + -ibam The input file is in BAM format. + Note: BAM _must_ be sorted by position + + -d Report the depth at each genome position (with one-based coordinates). + Default behavior is to report a histogram. + + -dz Report the depth at each genome position (with zero-based coordinates). + Reports only non-zero positions. + Default behavior is to report a histogram. + + -bg Report depth in BedGraph format. For details, see: + genome.ucsc.edu/goldenPath/help/bedgraph.html + + -bga Report depth in BedGraph format, as above (-bg). + However with this option, regions with zero + coverage are also reported. This allows one to + quickly extract all regions of a genome with 0 + coverage by applying: "grep -w 0$" to the output. + + -split Treat "split" BAM or BED12 entries as distinct BED intervals. + when computing coverage. + For BAM files, this uses the CIGAR "N" and "D" operations + to infer the blocks for computing coverage. + For BED12 files, this uses the BlockCount, BlockStarts, and BlockEnds + fields (i.e., columns 10,11,12). + + -ignoreD Ignore local deletions (CIGAR "D" operations) in BAM entries + when computing coverage. + + -strand Calculate coverage of intervals from a specific strand. + With BED files, requires at least 6 columns (strand is column 6). + - (STRING): can be + or - + + -pc Calculate coverage of pair-end fragments. + Works for BAM files only + -fs Force to use provided fragment size instead of read length + Works for BAM files only + -du Change strand af the mate read (so both reads from the same strand) useful for strand specific + Works for BAM files only + -5 Calculate coverage of 5" positions (instead of entire interval). + + -3 Calculate coverage of 3" positions (instead of entire interval). + + -max Combine all positions with a depth >= max into + a single bin in the histogram. Irrelevant + for -d and -bedGraph + - (INTEGER) + + -scale Scale the coverage by a constant factor. + Each coverage value is multiplied by this factor before being reported. + Useful for normalizing coverage by, e.g., reads per million (RPM). + - Default is 1.0; i.e., unscaled. + - (FLOAT) + + -trackline Adds a UCSC/Genome-Browser track line definition in the first line of the output. + - See here for more details about track line definition: + http://genome.ucsc.edu/goldenPath/help/bedgraph.html + - NOTE: When adding a trackline definition, the output BedGraph can be easily + uploaded to the Genome Browser as a custom track, + BUT CAN NOT be converted into a BigWig file (w/o removing the first line). + + -trackopts Writes additional track line definition parameters in the first line. + - Example: + -trackopts 'name="My Track" visibility=2 color=255,30,30' + Note the use of single-quotes if you have spaces in your parameters. + - (TEXT) + +Notes: + (1) The genome file should tab delimited and structured as follows: + + + For example, Human (hg19): + chr1 249250621 + chr2 243199373 + ... + chr18_gl000207_random 4262 + + (2) The input BED (-i) file must be grouped by chromosome. + A simple "sort -k 1,1 > .sorted" will suffice. + + (3) The input BAM (-ibam) file must be sorted by position. + A "samtools sort " should suffice. + +Tips: + One can use the UCSC Genome Browser's MySQL database to extract + chromosome sizes. For example, H. sapiens: + + mysql --user=genome --host=genome-mysql.cse.ucsc.edu -A -e \ + "select chrom, size from hg19.chromInfo" > hg19.genome + diff --git a/src/bedtools/bedtools_genomecov/script.sh b/src/bedtools/bedtools_genomecov/script.sh new file mode 100644 index 00000000..6a9543d2 --- /dev/null +++ b/src/bedtools/bedtools_genomecov/script.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +## VIASH START +## VIASH END + +[[ "$par_input_bam" == "false" ]] && unset par_input_bam + +bedtools genomecov \ + ${par_input_bam:+-ibam "$par_input_bam"} \ + -i "$par_input" \ + -g "$par_genome" \ + > "$par_output" + \ No newline at end of file diff --git a/src/bedtools/bedtools_genomecov/test.sh b/src/bedtools/bedtools_genomecov/test.sh new file mode 100644 index 00000000..6d9e52ea --- /dev/null +++ b/src/bedtools/bedtools_genomecov/test.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +# exit on error +set -e + +## VIASH START +meta_executable="target/executable/bedtools/bedtools_intersect/bedtools_intersect" +meta_resources_dir="src/bedtools/bedtools_intersect" +## VIASH END + +############################################# +# helper functions +assert_file_exists() { + [ -f "$1" ] || { echo "File '$1' does not exist" && exit 1; } +} +assert_file_not_empty() { + [ -s "$1" ] || { echo "File '$1' is empty but shouldn't be" && exit 1; } +} +assert_file_contains() { + grep -q "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; } +} +assert_identical_content() { + diff -a "$2" "$1" \ + || (echo "Files are not identical!" && exit 1) +} +############################################# + +# Create directories for tests +echo "Creating Test Data..." +mkdir -p test_data + +# create input files + +# create expected output files + + +# Test 1: +mkdir test1 +cd test1 + +echo "> Run bedtools_genomecov on BED file" +# "$meta_executable" \ +# --input_a "../test_data/featuresA.bed" \ +# --input_b "../test_data/featuresB.bed" \ +# --output "output.bed" + +# checks +assert_file_exists "output.bed" +assert_file_not_empty "output.bed" +assert_identical_content "output.bed" "../test_data/expected_default.bed" +echo "- test1 succeeded -" + +cd .. + + + +echo "---- All tests succeeded! ----" +exit 0 From 6f53e13941e009394d0360307885194bbe3dd09f Mon Sep 17 00:00:00 2001 From: tgaspe Date: Wed, 7 Aug 2024 10:44:23 -0300 Subject: [PATCH 02/17] Update config.vsh.yaml --- .../bedtools_genomecov/config.vsh.yaml | 125 +++++++++++++++++- 1 file changed, 124 insertions(+), 1 deletion(-) diff --git a/src/bedtools/bedtools_genomecov/config.vsh.yaml b/src/bedtools/bedtools_genomecov/config.vsh.yaml index fb2b39fe..40e9a97e 100644 --- a/src/bedtools/bedtools_genomecov/config.vsh.yaml +++ b/src/bedtools/bedtools_genomecov/config.vsh.yaml @@ -2,7 +2,7 @@ name: bedtools_genomecov namespace: bedtools description: | Compute the coverage of a feature file among a genome. -keywords: [genome coverage, BED] +keywords: [genome coverage, BED, GFF, VCF, BAM] links: documentation: https://bedtools.readthedocs.io/en/latest/content/tools/genomecov.html repository: https://github.com/arq5x/bedtools2 @@ -55,6 +55,129 @@ argument_groups: The input file is in BAM format. Note: BAM _must_ be sorted by position + - name: --depth + alternatives: -d + type: boolean_true + description: | + Report the depth at each genome position (with one-based coordinates). + Default behavior is to report a histogram. + + - name: --depth_zero + alternatives: -dz + type: boolean_true + description: | + Report the depth at each genome position (with zero-based coordinates). + Reports only non-zero positions. + Default behavior is to report a histogram. + + - name: --bed_graph + alternatives: -bg + type: boolean_true + description: | + Report depth in BedGraph format. For details, see: + genome.ucsc.edu/goldenPath/help/bedgraph.html + + - name: --bed_graph_zero_coverage + alternatives: -bga + type: boolean_true + description: | + Report depth in BedGraph format, as above (-bg). + However with this option, regions with zero + coverage are also reported. This allows one to + quickly extract all regions of a genome with 0 + coverage by applying: "grep -w 0$" to the output. + + - name: --split + type: boolean_true + description: | + Treat "split" BAM or BED12 entries as distinct BED intervals. + when computing coverage. + For BAM files, this uses the CIGAR "N" and "D" operations + to infer the blocks for computing coverage. + For BED12 files, this uses the BlockCount, BlockStarts, and BlockEnds + fields (i.e., columns 10,11,12). + + - name: --ignore_deletion + alternatives: -ignoreD + type: boolean_true + description: | + Ignore local deletions (CIGAR "D" operations) in BAM entries + when computing coverage. + + - name: --strand + type: string + description: | + Calculate coverage of intervals from a specific strand. + With BED files, requires at least 6 columns (strand is column 6). + - (STRING): can be + or - + + - name: --pair_end_coverage + alternatives: -pc + type: boolean_true + description: | + Calculate coverage of pair-end fragments. + Works for BAM files only + + - name: --fragment_size + alternatives: -fs + type: boolean_true + description: | + Force to use provided fragment size instead of read length + Works for BAM files only + + - name: --du + type: boolean_true + description: | + Change strand af the mate read (so both reads from the same strand) useful for strand specific + Works for BAM files only + + - name: --5_prime + alternatives: -5 + type: boolean_true + description: | + Calculate coverage of 5" positions (instead of entire interval). + + - name: --3_prime + alternatives: -3 + type: boolean_true + description: | + Calculate coverage of 3" positions (instead of entire interval). + + - name: --max + type: integer + description: | + Combine all positions with a depth >= max into + a single bin in the histogram. Irrelevant + for -d and -bedGraph + - (INTEGER) + + - name: --scale + type: double + description: | + Scale the coverage by a constant factor. + Each coverage value is multiplied by this factor before being reported. + Useful for normalizing coverage by, e.g., reads per million (RPM). + - Default is 1.0; i.e., unscaled. + - (FLOAT) + + - name: --trackline + type: boolean_true + description: | + Adds a UCSC/Genome-Browser track line definition in the first line of the output. + - See here for more details about track line definition: + http://genome.ucsc.edu/goldenPath/help/bedgraph.html + - NOTE: When adding a trackline definition, the output BedGraph can be easily + uploaded to the Genome Browser as a custom track, + BUT CAN NOT be converted into a BigWig file (w/o removing the first line). + + - name: --trackopts + type: string + description: | + Writes additional track line definition parameters in the first line. + - Example: + -trackopts 'name="My Track" visibility=2 color=255,30,30' + Note the use of single-quotes if you have spaces in your parameters. + - (TEXT) resources: - type: bash_script From da6e11fb8045311e8614e8c7dfb4005b46669f78 Mon Sep 17 00:00:00 2001 From: tgaspe Date: Wed, 7 Aug 2024 10:51:31 -0300 Subject: [PATCH 03/17] Update script.sh --- src/bedtools/bedtools_genomecov/script.sh | 28 +++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/src/bedtools/bedtools_genomecov/script.sh b/src/bedtools/bedtools_genomecov/script.sh index 6a9543d2..5dac5946 100644 --- a/src/bedtools/bedtools_genomecov/script.sh +++ b/src/bedtools/bedtools_genomecov/script.sh @@ -4,9 +4,37 @@ ## VIASH END [[ "$par_input_bam" == "false" ]] && unset par_input_bam +[[ "$par_depth" == "false" ]] && unset par_depth +[[ "$par_depth_zero" == "false" ]] && unset par_depth_zero +[[ "$par_bed_graph" == "false" ]] && unset par_bed_graph +[[ "$par_bed_graph_zero_coverage" == "false" ]] && unset par_bed_graph_zero_coverage +[[ "$par_split" == "false" ]] && unset par_split +[[ "$par_ignore_deletion" == "false" ]] && unset par_ignore_deletion +[[ "$par_pair_end_coverage" == "false" ]] && unset par_pair_end_coverage +[[ "$par_fragment_size" == "false" ]] && unset par_fragment_size +[[ "$par_du" == "false" ]] && unset par_du +[[ "$par_5_prime" == "false" ]] && unset par_5_prime +[[ "$par_3_prime" == "false" ]] && unset par_3_prime +[[ "$par_trackline" == "false" ]] && unset par_trackline bedtools genomecov \ ${par_input_bam:+-ibam "$par_input_bam"} \ + ${par_depth:+-d} \ + ${par_depth_zero:+-dz} \ + ${par_bed_graph:+-bg} \ + ${par_bed_graph_zero_coverage:+-bga} \ + ${par_split:+-split} \ + ${par_ignore_deletion:+-ignoreD} \ + ${par_pair_end_coverage:+-pc} \ + ${par_fragment_size:+-fs} \ + ${par_du:+-du} \ + ${par_5_prime:+-5} \ + ${par_3_prime:+-3} \ + ${par_trackline:+-trackline} \ + ${par_strand:+-strand "$par_strand"} \ + ${par_max:+-max "$par_max"} \ + ${par_scale:+-scale "$par_scale"} \ + ${par_trackopts:+-trackopts "$par_trackopts"} \ -i "$par_input" \ -g "$par_genome" \ > "$par_output" From bac80a5572370d8239f9d4ceb2b2c7c9e05da028 Mon Sep 17 00:00:00 2001 From: tgaspe Date: Wed, 7 Aug 2024 11:08:40 -0300 Subject: [PATCH 04/17] update on test.sh --- src/bedtools/bedtools_genomecov/test.sh | 25 +++++++++++++------ .../bedtools_genomecov/test_data/example.bed | 2 ++ .../bedtools_genomecov/test_data/genome.txt | 3 +++ 3 files changed, 22 insertions(+), 8 deletions(-) create mode 100644 src/bedtools/bedtools_genomecov/test_data/example.bed create mode 100644 src/bedtools/bedtools_genomecov/test_data/genome.txt diff --git a/src/bedtools/bedtools_genomecov/test.sh b/src/bedtools/bedtools_genomecov/test.sh index 6d9e52ea..5305438f 100644 --- a/src/bedtools/bedtools_genomecov/test.sh +++ b/src/bedtools/bedtools_genomecov/test.sh @@ -29,9 +29,18 @@ assert_identical_content() { echo "Creating Test Data..." mkdir -p test_data -# create input files - -# create expected output files +# Create and populate input files +printf "chr1\t248956422\nchr3\t242193529\nchr2\t198295559\n" > "test_data/genome.txt" +printf "chr2:172936693-172938111\t128\t228\tmy_read/1\t37\t+\nchr2:172936693-172938111\t428\t528\tmy_read/2\t37\t-\n" > "test_data/example.bed" +printf "chr2:172936693-172938111\t128\t228\tmy_read/1\t60\t+\t128\t228\t255,0,0\t1\t100\t0\nchr2:172936693-172938111\t428\t528\tmy_read/2\t60\t-\t428\t528\t255,0,0\t1\t100\t0\n" > "test_data/example.bed12" +# Create and populate example.gff file +printf "##gff-version 3\n" > "test_data/example.gff" +printf "chr1\t.\tgene\t1000\t2000\t.\t+\t.\tID=gene1;Name=Gene1\n" >> "test_data/example.gff" +printf "chr3\t.\tmRNA\t1000\t2000\t.\t+\t.\tID=transcript1;Parent=gene1\n" >> "test_data/example.gff" +printf "chr1\t.\texon\t1000\t1200\t.\t+\t.\tID=exon1;Parent=transcript1\n" >> "test_data/example.gff" +printf "chr2\t.\texon\t1500\t1700\t.\t+\t.\tID=exon2;Parent=transcript1\n" >> "test_data/example.gff" +printf "chr1\t.\tCDS\t1000\t1200\t.\t+\t0\tID=cds1;Parent=transcript1\n" >> "test_data/example.gff" +printf "chr1\t.\tCDS\t1500\t1700\t.\t+\t2\tID=cds2;Parent=transcript1\n" >> "test_data/example.gff" # Test 1: @@ -39,15 +48,15 @@ mkdir test1 cd test1 echo "> Run bedtools_genomecov on BED file" -# "$meta_executable" \ -# --input_a "../test_data/featuresA.bed" \ -# --input_b "../test_data/featuresB.bed" \ -# --output "output.bed" +"$meta_executable" \ + --input "../test_data/example.bed" \ + --genome "../test_data/genome.txt" \ + --output "output.bed" # checks assert_file_exists "output.bed" assert_file_not_empty "output.bed" -assert_identical_content "output.bed" "../test_data/expected_default.bed" +#assert_identical_content "output.bed" "../test_data/expected_default.bed" echo "- test1 succeeded -" cd .. diff --git a/src/bedtools/bedtools_genomecov/test_data/example.bed b/src/bedtools/bedtools_genomecov/test_data/example.bed new file mode 100644 index 00000000..09df88eb --- /dev/null +++ b/src/bedtools/bedtools_genomecov/test_data/example.bed @@ -0,0 +1,2 @@ +chr2 128 228 my_read/1 37 + +chr2 428 528 my_read/2 37 - diff --git a/src/bedtools/bedtools_genomecov/test_data/genome.txt b/src/bedtools/bedtools_genomecov/test_data/genome.txt new file mode 100644 index 00000000..460c34fe --- /dev/null +++ b/src/bedtools/bedtools_genomecov/test_data/genome.txt @@ -0,0 +1,3 @@ +chr1 248956422 +chr2 198295559 +chr3 242193529 From 6a81a19c61faa0b412584671db0bee75dd367e24 Mon Sep 17 00:00:00 2001 From: tgaspe Date: Thu, 8 Aug 2024 09:48:07 -0300 Subject: [PATCH 05/17] bug fixing --- .../bedtools_genomecov/config.vsh.yaml | 4 +- src/bedtools/bedtools_genomecov/script.sh | 8 ++-- src/bedtools/bedtools_genomecov/test.sh | 38 +++++++++++++------ 3 files changed, 32 insertions(+), 18 deletions(-) diff --git a/src/bedtools/bedtools_genomecov/config.vsh.yaml b/src/bedtools/bedtools_genomecov/config.vsh.yaml index 40e9a97e..452153b5 100644 --- a/src/bedtools/bedtools_genomecov/config.vsh.yaml +++ b/src/bedtools/bedtools_genomecov/config.vsh.yaml @@ -131,13 +131,13 @@ argument_groups: Change strand af the mate read (so both reads from the same strand) useful for strand specific Works for BAM files only - - name: --5_prime + - name: --five_prime alternatives: -5 type: boolean_true description: | Calculate coverage of 5" positions (instead of entire interval). - - name: --3_prime + - name: --three_prime alternatives: -3 type: boolean_true description: | diff --git a/src/bedtools/bedtools_genomecov/script.sh b/src/bedtools/bedtools_genomecov/script.sh index 5dac5946..06a120b7 100644 --- a/src/bedtools/bedtools_genomecov/script.sh +++ b/src/bedtools/bedtools_genomecov/script.sh @@ -13,8 +13,8 @@ [[ "$par_pair_end_coverage" == "false" ]] && unset par_pair_end_coverage [[ "$par_fragment_size" == "false" ]] && unset par_fragment_size [[ "$par_du" == "false" ]] && unset par_du -[[ "$par_5_prime" == "false" ]] && unset par_5_prime -[[ "$par_3_prime" == "false" ]] && unset par_3_prime +[[ "$par_five_prime" == "false" ]] && unset par_five_prime +[[ "$par_three_prime" == "false" ]] && unset par_three_prime [[ "$par_trackline" == "false" ]] && unset par_trackline bedtools genomecov \ @@ -28,8 +28,8 @@ bedtools genomecov \ ${par_pair_end_coverage:+-pc} \ ${par_fragment_size:+-fs} \ ${par_du:+-du} \ - ${par_5_prime:+-5} \ - ${par_3_prime:+-3} \ + ${par_five_prime:+-5} \ + ${par_three_prime:+-3} \ ${par_trackline:+-trackline} \ ${par_strand:+-strand "$par_strand"} \ ${par_max:+-max "$par_max"} \ diff --git a/src/bedtools/bedtools_genomecov/test.sh b/src/bedtools/bedtools_genomecov/test.sh index 5305438f..40cff0ff 100644 --- a/src/bedtools/bedtools_genomecov/test.sh +++ b/src/bedtools/bedtools_genomecov/test.sh @@ -30,18 +30,19 @@ echo "Creating Test Data..." mkdir -p test_data # Create and populate input files -printf "chr1\t248956422\nchr3\t242193529\nchr2\t198295559\n" > "test_data/genome.txt" -printf "chr2:172936693-172938111\t128\t228\tmy_read/1\t37\t+\nchr2:172936693-172938111\t428\t528\tmy_read/2\t37\t-\n" > "test_data/example.bed" -printf "chr2:172936693-172938111\t128\t228\tmy_read/1\t60\t+\t128\t228\t255,0,0\t1\t100\t0\nchr2:172936693-172938111\t428\t528\tmy_read/2\t60\t-\t428\t528\t255,0,0\t1\t100\t0\n" > "test_data/example.bed12" -# Create and populate example.gff file -printf "##gff-version 3\n" > "test_data/example.gff" -printf "chr1\t.\tgene\t1000\t2000\t.\t+\t.\tID=gene1;Name=Gene1\n" >> "test_data/example.gff" -printf "chr3\t.\tmRNA\t1000\t2000\t.\t+\t.\tID=transcript1;Parent=gene1\n" >> "test_data/example.gff" -printf "chr1\t.\texon\t1000\t1200\t.\t+\t.\tID=exon1;Parent=transcript1\n" >> "test_data/example.gff" -printf "chr2\t.\texon\t1500\t1700\t.\t+\t.\tID=exon2;Parent=transcript1\n" >> "test_data/example.gff" -printf "chr1\t.\tCDS\t1000\t1200\t.\t+\t0\tID=cds1;Parent=transcript1\n" >> "test_data/example.gff" -printf "chr1\t.\tCDS\t1500\t1700\t.\t+\t2\tID=cds2;Parent=transcript1\n" >> "test_data/example.gff" +printf "chr1\t248956422\nchr2\t198295559\nchr3\t242193529\n" > "test_data/genome.txt" +printf "chr2\t128\t228\tmy_read/1\t37\t+\nchr2\t428\t528\tmy_read/2\t37\t-\n" > "test_data/example.bed" +printf "chr2\t128\t228\tmy_read/1\t60\t+\t128\t228\t255,0,0\t1\t100\t0\nchr2\t428\t528\tmy_read/2\t60\t-\t428\t528\t255,0,0\t1\t100\t0\n" > "test_data/example.bed12" +# expected output +cat > "test_data/expected_default.bed" < Run bedtools_genomecov on BED file" # checks assert_file_exists "output.bed" assert_file_not_empty "output.bed" -#assert_identical_content "output.bed" "../test_data/expected_default.bed" +assert_identical_content "output.bed" "../test_data/expected_default.bed" echo "- test1 succeeded -" cd .. +# Test 2: ibam option and pair end option and fragment size option + +# Test 3: depth option + +# Test 4: strand option + +# Test 5: 5' end option + +# Test 6: max option + +# Test 7: scale option + +# Test 8: trackopts option echo "---- All tests succeeded! ----" From 278ff8ab2ea93ef8ddbec57c41bce2d6bbc38ead Mon Sep 17 00:00:00 2001 From: tgaspe Date: Thu, 8 Aug 2024 10:26:39 -0300 Subject: [PATCH 06/17] adding ibam option tests --- .../bedtools_genomecov/config.vsh.yaml | 19 +++-- src/bedtools/bedtools_genomecov/script.sh | 10 +-- src/bedtools/bedtools_genomecov/test.sh | 78 +++++++++++++++++- .../bedtools_genomecov/test_data/example.bam | Bin 0 -> 334 bytes 4 files changed, 91 insertions(+), 16 deletions(-) create mode 100644 src/bedtools/bedtools_genomecov/test_data/example.bam diff --git a/src/bedtools/bedtools_genomecov/config.vsh.yaml b/src/bedtools/bedtools_genomecov/config.vsh.yaml index 452153b5..9035858c 100644 --- a/src/bedtools/bedtools_genomecov/config.vsh.yaml +++ b/src/bedtools/bedtools_genomecov/config.vsh.yaml @@ -24,8 +24,15 @@ argument_groups: direction: input description: | The input file (BED/GFF/VCF) to be used. - required: true example: input.bed + + - name: --input_bam + alternatives: -ibam + type: file + description: | + The input file is in BAM format. + Note: BAM _must_ be sorted by positions. + --genome option is ignored! - name: --genome alternatives: -g @@ -33,7 +40,6 @@ argument_groups: direction: input description: | The genome file to be used. - required: true example: genome.txt - name: Outputs @@ -48,13 +54,7 @@ argument_groups: - name: Options arguments: - - name: --input_bam - alternatives: -ibam - type: boolean_true - description: | - The input file is in BAM format. - Note: BAM _must_ be sorted by position - + - name: --depth alternatives: -d type: boolean_true @@ -186,6 +186,7 @@ resources: test_resources: - type: bash_script path: test.sh + - path: test_data engines: - type: docker diff --git a/src/bedtools/bedtools_genomecov/script.sh b/src/bedtools/bedtools_genomecov/script.sh index 06a120b7..39c4d0db 100644 --- a/src/bedtools/bedtools_genomecov/script.sh +++ b/src/bedtools/bedtools_genomecov/script.sh @@ -18,15 +18,12 @@ [[ "$par_trackline" == "false" ]] && unset par_trackline bedtools genomecov \ - ${par_input_bam:+-ibam "$par_input_bam"} \ ${par_depth:+-d} \ ${par_depth_zero:+-dz} \ ${par_bed_graph:+-bg} \ ${par_bed_graph_zero_coverage:+-bga} \ ${par_split:+-split} \ ${par_ignore_deletion:+-ignoreD} \ - ${par_pair_end_coverage:+-pc} \ - ${par_fragment_size:+-fs} \ ${par_du:+-du} \ ${par_five_prime:+-5} \ ${par_three_prime:+-3} \ @@ -35,7 +32,10 @@ bedtools genomecov \ ${par_max:+-max "$par_max"} \ ${par_scale:+-scale "$par_scale"} \ ${par_trackopts:+-trackopts "$par_trackopts"} \ - -i "$par_input" \ - -g "$par_genome" \ + ${par_input_bam:+-ibam "$par_input_bam"} \ + ${par_input:+-i "$par_input"} \ + ${par_genome:+-g "$par_genome"} \ + ${par_pair_end_coverage:+-pc} \ + ${par_fragment_size:+-fs} \ > "$par_output" \ No newline at end of file diff --git a/src/bedtools/bedtools_genomecov/test.sh b/src/bedtools/bedtools_genomecov/test.sh index 40cff0ff..57510982 100644 --- a/src/bedtools/bedtools_genomecov/test.sh +++ b/src/bedtools/bedtools_genomecov/test.sh @@ -8,6 +8,9 @@ meta_executable="target/executable/bedtools/bedtools_intersect/bedtools_intersec meta_resources_dir="src/bedtools/bedtools_intersect" ## VIASH END +# directory of the bam file +test_data="$meta_resources_dir/test_data" + ############################################# # helper functions assert_file_exists() { @@ -34,7 +37,7 @@ printf "chr1\t248956422\nchr2\t198295559\nchr3\t242193529\n" > "test_data/genome printf "chr2\t128\t228\tmy_read/1\t37\t+\nchr2\t428\t528\tmy_read/2\t37\t-\n" > "test_data/example.bed" printf "chr2\t128\t228\tmy_read/1\t60\t+\t128\t228\t255,0,0\t1\t100\t0\nchr2\t428\t528\tmy_read/2\t60\t-\t428\t528\t255,0,0\t1\t100\t0\n" > "test_data/example.bed12" -# expected output +# expected outputs cat > "test_data/expected_default.bed" < "test_data/expected_ibam.bed" < "test_data/expected_ibam_pc.bed" < "test_data/expected_ibam_fs.bed" < Run bedtools_genomecov on BAM file with -ibam" +"$meta_executable" \ + --input_bam "$test_data/example.bam" \ + --output "output.bed" \ + +# checks +assert_file_exists "output.bed" +assert_file_not_empty "output.bed" +assert_identical_content "output.bed" "../test_data/expected_ibam.bed" +echo "- test2 succeeded -" + +cd .. # Test 3: depth option @@ -76,6 +112,44 @@ cd .. # Test 8: trackopts option +# Test 9: bedgraph option + +# Test 10: ibam pc options +mkdir test10 +cd test10 + +echo "> Run bedtools_genomecov on BAM file with -ibam, -pc" +"$meta_executable" \ + --input_bam "$test_data/example.bam" \ + --output "output.bed" \ + --fragment_size \ + --pair_end_coverage \ + +# checks +assert_file_exists "output.bed" +assert_file_not_empty "output.bed" +assert_identical_content "output.bed" "../test_data/expected_ibam_pc.bed" +echo "- test10 succeeded -" + +cd .. + +# Test 11: ibam fs options +mkdir test11 +cd test11 + +echo "> Run bedtools_genomecov on BAM file with -ibam, -fs" +"$meta_executable" \ + --input_bam "$test_data/example.bam" \ + --output "output.bed" \ + --fragment_size \ + +# checks +assert_file_exists "output.bed" +assert_file_not_empty "output.bed" +assert_identical_content "output.bed" "../test_data/expected_ibam_fs.bed" +echo "- test11 succeeded -" + +cd .. echo "---- All tests succeeded! ----" exit 0 diff --git a/src/bedtools/bedtools_genomecov/test_data/example.bam b/src/bedtools/bedtools_genomecov/test_data/example.bam new file mode 100644 index 0000000000000000000000000000000000000000..ffc075ab83a83a98ed1edbf88b26cc27ad8946c6 GIT binary patch literal 334 zcmb2|=3rp}f&Xj_PR>jWAq>SuUsA6mBqS7Y@IB%Aw%O~PhS4S?6Z1_bX2zRMuCZ>` z;o;@Ato^fw$CpQUheTtRYNNz-r#8JXHa3Ry>s4lk0?m>~GxQF_-U<7&m>dP#pU;|5 z)~CHK)-&PMX8(zQnRkjz7tzu&Q_9lpm^;_nXXDZb**~)OH9hZA+GbYw!F2!1eU^u& z=6?J8db>^n+vnS58VqGOpQde!^LhT^FPno$sK1R;RVb&i_o5|>-LG;Kg??MHCx&;~ ziZww?R#r16X1LX_ZFYQZ=WBLl9Y-y@V*W$>-;Wo3eOwoN_@m-GsXhDI Date: Thu, 8 Aug 2024 10:39:37 -0300 Subject: [PATCH 07/17] depthzero and strand option tests --- src/bedtools/bedtools_genomecov/test.sh | 48 +++++++++++++++++++ .../test_data/example_dz.bed | 1 + 2 files changed, 49 insertions(+) create mode 100644 src/bedtools/bedtools_genomecov/test_data/example_dz.bed diff --git a/src/bedtools/bedtools_genomecov/test.sh b/src/bedtools/bedtools_genomecov/test.sh index 57510982..1f62db8d 100644 --- a/src/bedtools/bedtools_genomecov/test.sh +++ b/src/bedtools/bedtools_genomecov/test.sh @@ -36,6 +36,7 @@ mkdir -p test_data printf "chr1\t248956422\nchr2\t198295559\nchr3\t242193529\n" > "test_data/genome.txt" printf "chr2\t128\t228\tmy_read/1\t37\t+\nchr2\t428\t528\tmy_read/2\t37\t-\n" > "test_data/example.bed" printf "chr2\t128\t228\tmy_read/1\t60\t+\t128\t228\t255,0,0\t1\t100\t0\nchr2\t428\t528\tmy_read/2\t60\t-\t428\t528\t255,0,0\t1\t100\t0\n" > "test_data/example.bed12" +printf "chr2\t100\t103\n" > "test_data/example_dz.bed" # expected outputs cat > "test_data/expected_default.bed" < "test_data/expected_dz.bed" < "test_data/expected_strand.bed" < Run bedtools_genomecov on BED file with -dz" +"$meta_executable" \ + --input "../test_data/example_dz.bed" \ + --genome "../test_data/genome.txt" \ + --output "output.bed" \ + --depth_zero + +# checks +assert_file_exists "output.bed" +assert_file_not_empty "output.bed" +assert_identical_content "output.bed" "../test_data/expected_dz.bed" +echo "- test3 succeeded -" + +cd .. # Test 4: strand option +mkdir test4 +cd test4 + +echo "> Run bedtools_genomecov on BED file with -strand" +"$meta_executable" \ + --input "../test_data/example.bed" \ + --genome "../test_data/genome.txt" \ + --output "output.bed" \ + --strand "-" \ + +# checks +assert_file_exists "output.bed" +assert_file_not_empty "output.bed" +assert_identical_content "output.bed" "../test_data/expected_strand.bed" +echo "- test4 succeeded -" + +cd .. # Test 5: 5' end option diff --git a/src/bedtools/bedtools_genomecov/test_data/example_dz.bed b/src/bedtools/bedtools_genomecov/test_data/example_dz.bed new file mode 100644 index 00000000..a5b1a250 --- /dev/null +++ b/src/bedtools/bedtools_genomecov/test_data/example_dz.bed @@ -0,0 +1 @@ +chr2 100 103 From f264e22abd0307152425e738c04e4cb194548bc9 Mon Sep 17 00:00:00 2001 From: tgaspe Date: Thu, 8 Aug 2024 10:48:28 -0300 Subject: [PATCH 08/17] 5prime and max tests --- src/bedtools/bedtools_genomecov/test.sh | 60 +++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/src/bedtools/bedtools_genomecov/test.sh b/src/bedtools/bedtools_genomecov/test.sh index 1f62db8d..a5bae97c 100644 --- a/src/bedtools/bedtools_genomecov/test.sh +++ b/src/bedtools/bedtools_genomecov/test.sh @@ -78,6 +78,15 @@ chr3 0 242193529 242193529 1 genome 0 689445410 689445510 1 genome 1 100 689445510 1.45044e-07 EOF +cat > "test_data/expected_5.bed" < Run bedtools_genomecov on BED file with -5" +"$meta_executable" \ + --input "../test_data/example.bed" \ + --genome "../test_data/genome.txt" \ + --output "output.bed" \ + --five_prime \ + +# checks +assert_file_exists "output.bed" +assert_file_not_empty "output.bed" +assert_identical_content "output.bed" "../test_data/expected_5.bed" +echo "- test5 succeeded -" + +cd .. # Test 6: max option +mkdir test6 +cd test6 + +echo "> Run bedtools_genomecov on BED file with -max" +"$meta_executable" \ + --input "../test_data/example.bed" \ + --genome "../test_data/genome.txt" \ + --output "output.bed" \ + --max 100 \ + +# checks +assert_file_exists "output.bed" +assert_file_not_empty "output.bed" +assert_identical_content "output.bed" "../test_data/expected_default.bed" +echo "- test6 succeeded -" + +cd .. # Test 7: scale option +# mkdir test7 +# cd test7 + +# echo "> Run bedtools_genomecov on BED file with bedgraph and scale" +# "$meta_executable" \ +# --input "../test_data/example.bed" \ +# --genome "../test_data/genome.txt" \ +# --output "output.bed" \ +# --scale 100 \ + +# # checks +# assert_file_exists "output.bed" +# assert_file_not_empty "output.bed" +# assert_identical_content "output.bed" "../test_data/expected_default.bed" +# echo "- test7 succeeded -" + +# cd .. # Test 8: trackopts option From 7a27fe5efc634db272abc195e4a29ab3f0e99cc8 Mon Sep 17 00:00:00 2001 From: tgaspe Date: Thu, 8 Aug 2024 11:23:18 -0300 Subject: [PATCH 09/17] more tests --- src/bedtools/bedtools_genomecov/test.sh | 125 ++++++++++++++---- .../test_data/example.bed12 | 2 + 2 files changed, 103 insertions(+), 24 deletions(-) create mode 100644 src/bedtools/bedtools_genomecov/test_data/example.bed12 diff --git a/src/bedtools/bedtools_genomecov/test.sh b/src/bedtools/bedtools_genomecov/test.sh index a5bae97c..cf262fc7 100644 --- a/src/bedtools/bedtools_genomecov/test.sh +++ b/src/bedtools/bedtools_genomecov/test.sh @@ -86,7 +86,29 @@ chr3 0 242193529 242193529 1 genome 0 689445508 689445510 1 genome 1 2 689445510 2.90088e-09 EOF - +cat > "test_data/expected_bg_scale.bed" < "test_data/expected_trackopts.bed" < "test_data/expected_split.bed" < "test_data/expected_ignoreD_du.bed" < Run bedtools_genomecov on BED file with bedgraph and scale" -# "$meta_executable" \ -# --input "../test_data/example.bed" \ -# --genome "../test_data/genome.txt" \ -# --output "output.bed" \ -# --scale 100 \ +echo "> Run bedtools_genomecov on BED file with -bg and -scale" +"$meta_executable" \ + --input "../test_data/example.bed" \ + --genome "../test_data/genome.txt" \ + --output "output.bed" \ + --bed_graph \ + --scale 100 \ -# # checks -# assert_file_exists "output.bed" -# assert_file_not_empty "output.bed" -# assert_identical_content "output.bed" "../test_data/expected_default.bed" -# echo "- test7 succeeded -" +# checks +assert_file_exists "output.bed" +assert_file_not_empty "output.bed" +assert_identical_content "output.bed" "../test_data/expected_bg_scale.bed" +echo "- test7 succeeded -" -# cd .. +cd .. # Test 8: trackopts option +mkdir test8 +cd test8 -# Test 9: bedgraph option +echo "> Run bedtools_genomecov on BED file with -bg and -trackopts" +"$meta_executable" \ + --input "../test_data/example.bed" \ + --genome "../test_data/genome.txt" \ + --output "output.bed" \ + --bed_graph \ + --trackopts "name=example" \ -# Test 10: ibam pc options -mkdir test10 -cd test10 +# checks +assert_file_exists "output.bed" +assert_file_not_empty "output.bed" +assert_identical_content "output.bed" "../test_data/expected_trackopts.bed" +echo "- test8 succeeded -" + +cd .. + +# Test 9: ibam pc options +mkdir test9 +cd test9 echo "> Run bedtools_genomecov on BAM file with -ibam, -pc" "$meta_executable" \ @@ -237,13 +276,13 @@ echo "> Run bedtools_genomecov on BAM file with -ibam, -pc" assert_file_exists "output.bed" assert_file_not_empty "output.bed" assert_identical_content "output.bed" "../test_data/expected_ibam_pc.bed" -echo "- test10 succeeded -" +echo "- test9 succeeded -" cd .. -# Test 11: ibam fs options -mkdir test11 -cd test11 +# Test 10: ibam fs options +mkdir test10 +cd test10 echo "> Run bedtools_genomecov on BAM file with -ibam, -fs" "$meta_executable" \ @@ -255,9 +294,47 @@ echo "> Run bedtools_genomecov on BAM file with -ibam, -fs" assert_file_exists "output.bed" assert_file_not_empty "output.bed" assert_identical_content "output.bed" "../test_data/expected_ibam_fs.bed" +echo "- test10 succeeded -" + +cd .. + +# Test 11: split +mkdir test11 +cd test11 + +echo "> Run bedtools_genomecov on BED12 file with -split" +"$meta_executable" \ + --input "../test_data/example.bed12" \ + --genome "../test_data/genome.txt" \ + --output "output.bed" \ + --split \ + +# checks +assert_file_exists "output.bed" +assert_file_not_empty "output.bed" +assert_identical_content "output.bed" "../test_data/expected_split.bed" echo "- test11 succeeded -" cd .. +# Test 12: ignore deletion and du +mkdir test12 +cd test12 + +echo "> Run bedtools_genomecov on BAM file with -ignoreD and -du" +"$meta_executable" \ + --input_bam "$test_data/example.bam" \ + --output "output.bed" \ + --ignore_deletion \ + --du \ + +# checks +assert_file_exists "output.bed" +assert_file_not_empty "output.bed" +assert_identical_content "output.bed" "../test_data/expected_ignoreD_du.bed" +echo "- test12 succeeded -" + +cd .. + echo "---- All tests succeeded! ----" exit 0 diff --git a/src/bedtools/bedtools_genomecov/test_data/example.bed12 b/src/bedtools/bedtools_genomecov/test_data/example.bed12 new file mode 100644 index 00000000..58c7a75d --- /dev/null +++ b/src/bedtools/bedtools_genomecov/test_data/example.bed12 @@ -0,0 +1,2 @@ +chr2 128 228 my_read/1 60 + 128 228 255,0,0 1 100 0 +chr2 428 528 my_read/2 60 - 428 528 255,0,0 1 100 0 From d3b46757182943d25c0aeef6f4a2d49d458978f4 Mon Sep 17 00:00:00 2001 From: tgaspe Date: Thu, 8 Aug 2024 11:25:24 -0300 Subject: [PATCH 10/17] Changelog --- CHANGELOG.md | 2 +- src/bedtools/bedtools_genomecov/test_data/example.bed | 2 -- src/bedtools/bedtools_genomecov/test_data/example.bed12 | 2 -- src/bedtools/bedtools_genomecov/test_data/example_dz.bed | 1 - src/bedtools/bedtools_genomecov/test_data/genome.txt | 3 --- 5 files changed, 1 insertion(+), 9 deletions(-) delete mode 100644 src/bedtools/bedtools_genomecov/test_data/example.bed delete mode 100644 src/bedtools/bedtools_genomecov/test_data/example.bed12 delete mode 100644 src/bedtools/bedtools_genomecov/test_data/example_dz.bed delete mode 100644 src/bedtools/bedtools_genomecov/test_data/genome.txt diff --git a/CHANGELOG.md b/CHANGELOG.md index 9dd2389c..73a03dc7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,7 +26,7 @@ * `bedtools`: - `bedtools/bedtools_intersect`: Allows one to screen for overlaps between two sets of genomic features (PR #94). - `bedtools/bedtools_sort`: Sorts a feature file (bed/gff/vcf) by chromosome and other criteria (PR #98). - + - `bedtools/bedtools_genomecov`: Compute the coverage of a feature file (bed/gff/vcf/bam) among a genome (PR #128). ## MINOR CHANGES diff --git a/src/bedtools/bedtools_genomecov/test_data/example.bed b/src/bedtools/bedtools_genomecov/test_data/example.bed deleted file mode 100644 index 09df88eb..00000000 --- a/src/bedtools/bedtools_genomecov/test_data/example.bed +++ /dev/null @@ -1,2 +0,0 @@ -chr2 128 228 my_read/1 37 + -chr2 428 528 my_read/2 37 - diff --git a/src/bedtools/bedtools_genomecov/test_data/example.bed12 b/src/bedtools/bedtools_genomecov/test_data/example.bed12 deleted file mode 100644 index 58c7a75d..00000000 --- a/src/bedtools/bedtools_genomecov/test_data/example.bed12 +++ /dev/null @@ -1,2 +0,0 @@ -chr2 128 228 my_read/1 60 + 128 228 255,0,0 1 100 0 -chr2 428 528 my_read/2 60 - 428 528 255,0,0 1 100 0 diff --git a/src/bedtools/bedtools_genomecov/test_data/example_dz.bed b/src/bedtools/bedtools_genomecov/test_data/example_dz.bed deleted file mode 100644 index a5b1a250..00000000 --- a/src/bedtools/bedtools_genomecov/test_data/example_dz.bed +++ /dev/null @@ -1 +0,0 @@ -chr2 100 103 diff --git a/src/bedtools/bedtools_genomecov/test_data/genome.txt b/src/bedtools/bedtools_genomecov/test_data/genome.txt deleted file mode 100644 index 460c34fe..00000000 --- a/src/bedtools/bedtools_genomecov/test_data/genome.txt +++ /dev/null @@ -1,3 +0,0 @@ -chr1 248956422 -chr2 198295559 -chr3 242193529 From f979eacdc80077d71228a0e8c94ceeb817abc06a Mon Sep 17 00:00:00 2001 From: tgaspe Date: Mon, 12 Aug 2024 00:19:25 +0200 Subject: [PATCH 11/17] Update config.vsh.yaml --- src/bedtools/bedtools_genomecov/config.vsh.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bedtools/bedtools_genomecov/config.vsh.yaml b/src/bedtools/bedtools_genomecov/config.vsh.yaml index 9035858c..2646735d 100644 --- a/src/bedtools/bedtools_genomecov/config.vsh.yaml +++ b/src/bedtools/bedtools_genomecov/config.vsh.yaml @@ -8,7 +8,7 @@ links: repository: https://github.com/arq5x/bedtools2 references: doi: 10.1093/bioinformatics/btq033 -license: GPL-2.0, MIT +license: MIT requirements: commands: [bedtools] authors: From d5ff055f5d66714a17428cad6aba53b63423cfac Mon Sep 17 00:00:00 2001 From: tgaspe Date: Mon, 12 Aug 2024 21:18:48 +0200 Subject: [PATCH 12/17] Update config.vsh.yaml --- src/bedtools/bedtools_genomecov/config.vsh.yaml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/bedtools/bedtools_genomecov/config.vsh.yaml b/src/bedtools/bedtools_genomecov/config.vsh.yaml index 2646735d..0dddd7c7 100644 --- a/src/bedtools/bedtools_genomecov/config.vsh.yaml +++ b/src/bedtools/bedtools_genomecov/config.vsh.yaml @@ -4,8 +4,10 @@ description: | Compute the coverage of a feature file among a genome. keywords: [genome coverage, BED, GFF, VCF, BAM] links: + homepage: https://bedtools.readthedocs.io/en/latest/# documentation: https://bedtools.readthedocs.io/en/latest/content/tools/genomecov.html repository: https://github.com/arq5x/bedtools2 + issue_tracker: https://github.com/arq5x/bedtools2/issues references: doi: 10.1093/bioinformatics/btq033 license: MIT @@ -32,7 +34,7 @@ argument_groups: description: | The input file is in BAM format. Note: BAM _must_ be sorted by positions. - --genome option is ignored! + '--genome' option is ignored if you use '--input_bam' option! - name: --genome alternatives: -g @@ -106,10 +108,10 @@ argument_groups: - name: --strand type: string + choices: ["+", "-"] description: | Calculate coverage of intervals from a specific strand. With BED files, requires at least 6 columns (strand is column 6). - - (STRING): can be + or - - name: --pair_end_coverage alternatives: -pc @@ -145,6 +147,7 @@ argument_groups: - name: --max type: integer + min: 0 description: | Combine all positions with a depth >= max into a single bin in the histogram. Irrelevant @@ -153,6 +156,7 @@ argument_groups: - name: --scale type: double + min: 0 description: | Scale the coverage by a constant factor. Each coverage value is multiplied by this factor before being reported. From 007c50eec6427c257669fb6b8805639888140b9a Mon Sep 17 00:00:00 2001 From: tgaspe Date: Mon, 12 Aug 2024 21:22:50 +0200 Subject: [PATCH 13/17] Update script.sh --- src/bedtools/bedtools_genomecov/script.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/bedtools/bedtools_genomecov/script.sh b/src/bedtools/bedtools_genomecov/script.sh index 39c4d0db..88d3b68f 100644 --- a/src/bedtools/bedtools_genomecov/script.sh +++ b/src/bedtools/bedtools_genomecov/script.sh @@ -3,6 +3,9 @@ ## VIASH START ## VIASH END +# Exit on error +set -eo pipefail + [[ "$par_input_bam" == "false" ]] && unset par_input_bam [[ "$par_depth" == "false" ]] && unset par_depth [[ "$par_depth_zero" == "false" ]] && unset par_depth_zero From 77446e2ac80e805293bf0dee76f805879513e40a Mon Sep 17 00:00:00 2001 From: tgaspe Date: Mon, 12 Aug 2024 21:24:18 +0200 Subject: [PATCH 14/17] Update test.sh --- src/bedtools/bedtools_genomecov/test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bedtools/bedtools_genomecov/test.sh b/src/bedtools/bedtools_genomecov/test.sh index cf262fc7..b2253600 100644 --- a/src/bedtools/bedtools_genomecov/test.sh +++ b/src/bedtools/bedtools_genomecov/test.sh @@ -1,7 +1,7 @@ #!/bin/bash # exit on error -set -e +set -eo pipefail ## VIASH START meta_executable="target/executable/bedtools/bedtools_intersect/bedtools_intersect" From 516e0183314956a0e36dd773a827e8055280def3 Mon Sep 17 00:00:00 2001 From: tgaspe Date: Tue, 13 Aug 2024 11:29:27 +0200 Subject: [PATCH 15/17] TMPDIR --- src/bedtools/bedtools_genomecov/test.sh | 152 +++++++++++------------- 1 file changed, 72 insertions(+), 80 deletions(-) diff --git a/src/bedtools/bedtools_genomecov/test.sh b/src/bedtools/bedtools_genomecov/test.sh index b2253600..89d53061 100644 --- a/src/bedtools/bedtools_genomecov/test.sh +++ b/src/bedtools/bedtools_genomecov/test.sh @@ -30,16 +30,20 @@ assert_identical_content() { # Create directories for tests echo "Creating Test Data..." -mkdir -p test_data +TMPDIR=$(mktemp -d "$meta_temp_dir/XXXXXX") +function clean_up { + [[ -d "$TMPDIR" ]] && rm -r "$TMPDIR" +} +trap clean_up EXIT # Create and populate input files -printf "chr1\t248956422\nchr2\t198295559\nchr3\t242193529\n" > "test_data/genome.txt" -printf "chr2\t128\t228\tmy_read/1\t37\t+\nchr2\t428\t528\tmy_read/2\t37\t-\n" > "test_data/example.bed" -printf "chr2\t128\t228\tmy_read/1\t60\t+\t128\t228\t255,0,0\t1\t100\t0\nchr2\t428\t528\tmy_read/2\t60\t-\t428\t528\t255,0,0\t1\t100\t0\n" > "test_data/example.bed12" -printf "chr2\t100\t103\n" > "test_data/example_dz.bed" +printf "chr1\t248956422\nchr2\t198295559\nchr3\t242193529\n" > "$TMPDIR/genome.txt" +printf "chr2\t128\t228\tmy_read/1\t37\t+\nchr2\t428\t528\tmy_read/2\t37\t-\n" > "$TMPDIR/example.bed" +printf "chr2\t128\t228\tmy_read/1\t60\t+\t128\t228\t255,0,0\t1\t100\t0\nchr2\t428\t528\tmy_read/2\t60\t-\t428\t528\t255,0,0\t1\t100\t0\n" > "$TMPDIR/example.bed12" +printf "chr2\t100\t103\n" > "$TMPDIR/example_dz.bed" # expected outputs -cat > "test_data/expected_default.bed" < "$TMPDIR/expected_default.bed" < "test_data/expected_ibam.bed" < "$TMPDIR/expected_ibam.bed" < "test_data/expected_ibam_pc.bed" < "$TMPDIR/expected_ibam_pc.bed" < "test_data/expected_ibam_fs.bed" < "$TMPDIR/expected_ibam_fs.bed" < "test_data/expected_dz.bed" < "$TMPDIR/expected_dz.bed" < "test_data/expected_strand.bed" < "$TMPDIR/expected_strand.bed" < "test_data/expected_5.bed" < "$TMPDIR/expected_5.bed" < "test_data/expected_bg_scale.bed" < "$TMPDIR/expected_bg_scale.bed" < "test_data/expected_trackopts.bed" < "$TMPDIR/expected_trackopts.bed" < "test_data/expected_split.bed" < "$TMPDIR/expected_split.bed" < "test_data/expected_ignoreD_du.bed" < "$TMPDIR/expected_ignoreD_du.bed" < /dev/null echo "> Run bedtools_genomecov on BED file" "$meta_executable" \ - --input "../test_data/example.bed" \ - --genome "../test_data/genome.txt" \ + --input "../example.bed" \ + --genome "../genome.txt" \ --output "output.bed" # checks assert_file_exists "output.bed" assert_file_not_empty "output.bed" -assert_identical_content "output.bed" "../test_data/expected_default.bed" +assert_identical_content "output.bed" "../expected_default.bed" echo "- test1 succeeded -" -cd .. +popd > /dev/null # Test 2: ibam option -mkdir test2 -cd test2 +mkdir "$TMPDIR/test2" && pushd "$TMPDIR/test2" > /dev/null echo "> Run bedtools_genomecov on BAM file with -ibam" "$meta_executable" \ @@ -140,95 +142,90 @@ echo "> Run bedtools_genomecov on BAM file with -ibam" # checks assert_file_exists "output.bed" assert_file_not_empty "output.bed" -assert_identical_content "output.bed" "../test_data/expected_ibam.bed" +assert_identical_content "output.bed" "../expected_ibam.bed" echo "- test2 succeeded -" -cd .. +popd > /dev/null # Test 3: depth option -mkdir test3 -cd test3 +mkdir "$TMPDIR/test3" && pushd "$TMPDIR/test3" > /dev/null echo "> Run bedtools_genomecov on BED file with -dz" "$meta_executable" \ - --input "../test_data/example_dz.bed" \ - --genome "../test_data/genome.txt" \ + --input "../example_dz.bed" \ + --genome "../genome.txt" \ --output "output.bed" \ --depth_zero # checks assert_file_exists "output.bed" assert_file_not_empty "output.bed" -assert_identical_content "output.bed" "../test_data/expected_dz.bed" +assert_identical_content "output.bed" "../expected_dz.bed" echo "- test3 succeeded -" -cd .. +popd > /dev/null # Test 4: strand option -mkdir test4 -cd test4 +mkdir "$TMPDIR/test4" && pushd "$TMPDIR/test4" > /dev/null echo "> Run bedtools_genomecov on BED file with -strand" "$meta_executable" \ - --input "../test_data/example.bed" \ - --genome "../test_data/genome.txt" \ + --input "../example.bed" \ + --genome "../genome.txt" \ --output "output.bed" \ --strand "-" \ # checks assert_file_exists "output.bed" assert_file_not_empty "output.bed" -assert_identical_content "output.bed" "../test_data/expected_strand.bed" +assert_identical_content "output.bed" "../expected_strand.bed" echo "- test4 succeeded -" -cd .. +popd > /dev/null # Test 5: 5' end option -mkdir test5 -cd test5 +mkdir "$TMPDIR/test5" && pushd "$TMPDIR/test5" > /dev/null echo "> Run bedtools_genomecov on BED file with -5" "$meta_executable" \ - --input "../test_data/example.bed" \ - --genome "../test_data/genome.txt" \ + --input "../example.bed" \ + --genome "../genome.txt" \ --output "output.bed" \ --five_prime \ # checks assert_file_exists "output.bed" assert_file_not_empty "output.bed" -assert_identical_content "output.bed" "../test_data/expected_5.bed" +assert_identical_content "output.bed" "../expected_5.bed" echo "- test5 succeeded -" -cd .. +popd > /dev/null # Test 6: max option -mkdir test6 -cd test6 +mkdir "$TMPDIR/test6" && pushd "$TMPDIR/test6" > /dev/null echo "> Run bedtools_genomecov on BED file with -max" "$meta_executable" \ - --input "../test_data/example.bed" \ - --genome "../test_data/genome.txt" \ + --input "../example.bed" \ + --genome "../genome.txt" \ --output "output.bed" \ --max 100 \ # checks assert_file_exists "output.bed" assert_file_not_empty "output.bed" -assert_identical_content "output.bed" "../test_data/expected_default.bed" +assert_identical_content "output.bed" "../expected_default.bed" echo "- test6 succeeded -" -cd .. +popd > /dev/null # Test 7: bedgraph and scale option -mkdir test7 -cd test7 +mkdir "$TMPDIR/test7" && pushd "$TMPDIR/test7" > /dev/null echo "> Run bedtools_genomecov on BED file with -bg and -scale" "$meta_executable" \ - --input "../test_data/example.bed" \ - --genome "../test_data/genome.txt" \ + --input "../example.bed" \ + --genome "../genome.txt" \ --output "output.bed" \ --bed_graph \ --scale 100 \ @@ -236,19 +233,18 @@ echo "> Run bedtools_genomecov on BED file with -bg and -scale" # checks assert_file_exists "output.bed" assert_file_not_empty "output.bed" -assert_identical_content "output.bed" "../test_data/expected_bg_scale.bed" +assert_identical_content "output.bed" "../expected_bg_scale.bed" echo "- test7 succeeded -" -cd .. +popd > /dev/null # Test 8: trackopts option -mkdir test8 -cd test8 +mkdir "$TMPDIR/test8" && pushd "$TMPDIR/test8" > /dev/null echo "> Run bedtools_genomecov on BED file with -bg and -trackopts" "$meta_executable" \ - --input "../test_data/example.bed" \ - --genome "../test_data/genome.txt" \ + --input "../example.bed" \ + --genome "../genome.txt" \ --output "output.bed" \ --bed_graph \ --trackopts "name=example" \ @@ -256,14 +252,13 @@ echo "> Run bedtools_genomecov on BED file with -bg and -trackopts" # checks assert_file_exists "output.bed" assert_file_not_empty "output.bed" -assert_identical_content "output.bed" "../test_data/expected_trackopts.bed" +assert_identical_content "output.bed" "../expected_trackopts.bed" echo "- test8 succeeded -" -cd .. +popd > /dev/null # Test 9: ibam pc options -mkdir test9 -cd test9 +mkdir "$TMPDIR/test9" && pushd "$TMPDIR/test9" > /dev/null echo "> Run bedtools_genomecov on BAM file with -ibam, -pc" "$meta_executable" \ @@ -275,14 +270,13 @@ echo "> Run bedtools_genomecov on BAM file with -ibam, -pc" # checks assert_file_exists "output.bed" assert_file_not_empty "output.bed" -assert_identical_content "output.bed" "../test_data/expected_ibam_pc.bed" +assert_identical_content "output.bed" "../expected_ibam_pc.bed" echo "- test9 succeeded -" -cd .. +popd > /dev/null # Test 10: ibam fs options -mkdir test10 -cd test10 +mkdir "$TMPDIR/test10" && pushd "$TMPDIR/test10" > /dev/null echo "> Run bedtools_genomecov on BAM file with -ibam, -fs" "$meta_executable" \ @@ -293,33 +287,31 @@ echo "> Run bedtools_genomecov on BAM file with -ibam, -fs" # checks assert_file_exists "output.bed" assert_file_not_empty "output.bed" -assert_identical_content "output.bed" "../test_data/expected_ibam_fs.bed" +assert_identical_content "output.bed" "../expected_ibam_fs.bed" echo "- test10 succeeded -" -cd .. +popd > /dev/null # Test 11: split -mkdir test11 -cd test11 +mkdir "$TMPDIR/test11" && pushd "$TMPDIR/test11" > /dev/null echo "> Run bedtools_genomecov on BED12 file with -split" "$meta_executable" \ - --input "../test_data/example.bed12" \ - --genome "../test_data/genome.txt" \ + --input "../example.bed12" \ + --genome "../genome.txt" \ --output "output.bed" \ --split \ # checks assert_file_exists "output.bed" assert_file_not_empty "output.bed" -assert_identical_content "output.bed" "../test_data/expected_split.bed" +assert_identical_content "output.bed" "../expected_split.bed" echo "- test11 succeeded -" -cd .. +popd > /dev/null # Test 12: ignore deletion and du -mkdir test12 -cd test12 +mkdir "$TMPDIR/test12" && pushd "$TMPDIR/test12" > /dev/null echo "> Run bedtools_genomecov on BAM file with -ignoreD and -du" "$meta_executable" \ @@ -331,10 +323,10 @@ echo "> Run bedtools_genomecov on BAM file with -ignoreD and -du" # checks assert_file_exists "output.bed" assert_file_not_empty "output.bed" -assert_identical_content "output.bed" "../test_data/expected_ignoreD_du.bed" +assert_identical_content "output.bed" "../expected_ignoreD_du.bed" echo "- test12 succeeded -" -cd .. +popd > /dev/null echo "---- All tests succeeded! ----" exit 0 From 3b8852763bc9c5975fe9cc8c5363b5767efcb524 Mon Sep 17 00:00:00 2001 From: tgaspe Date: Tue, 20 Aug 2024 19:27:58 +0200 Subject: [PATCH 16/17] Unset Variables --- src/bedtools/bedtools_genomecov/script.sh | 34 ++++++++++++++--------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/src/bedtools/bedtools_genomecov/script.sh b/src/bedtools/bedtools_genomecov/script.sh index 88d3b68f..28e005fe 100644 --- a/src/bedtools/bedtools_genomecov/script.sh +++ b/src/bedtools/bedtools_genomecov/script.sh @@ -6,19 +6,27 @@ # Exit on error set -eo pipefail -[[ "$par_input_bam" == "false" ]] && unset par_input_bam -[[ "$par_depth" == "false" ]] && unset par_depth -[[ "$par_depth_zero" == "false" ]] && unset par_depth_zero -[[ "$par_bed_graph" == "false" ]] && unset par_bed_graph -[[ "$par_bed_graph_zero_coverage" == "false" ]] && unset par_bed_graph_zero_coverage -[[ "$par_split" == "false" ]] && unset par_split -[[ "$par_ignore_deletion" == "false" ]] && unset par_ignore_deletion -[[ "$par_pair_end_coverage" == "false" ]] && unset par_pair_end_coverage -[[ "$par_fragment_size" == "false" ]] && unset par_fragment_size -[[ "$par_du" == "false" ]] && unset par_du -[[ "$par_five_prime" == "false" ]] && unset par_five_prime -[[ "$par_three_prime" == "false" ]] && unset par_three_prime -[[ "$par_trackline" == "false" ]] && unset par_trackline +# Unset variables +unset_if_false=( + par_input_bam + par_depth + par_depth_zero + par_bed_graph + par_bed_graph_zero_coverage + par_split + par_ignore_deletion + par_pair_end_coverage + par_fragment_size + par_du + par_five_prime + par_three_prime + par_trackline +) + +for par in ${unset_if_false[@]}; do + test_val="${!par}" + [[ "$test_val" == "false" ]] && unset $par +done bedtools genomecov \ ${par_depth:+-d} \ From 1f5adfc3846fb03a7f94e30a2791470e0cc00575 Mon Sep 17 00:00:00 2001 From: tgaspe Date: Tue, 20 Aug 2024 19:57:30 +0200 Subject: [PATCH 17/17] par_trackopts multiple: true --- src/bedtools/bedtools_genomecov/config.vsh.yaml | 1 + src/bedtools/bedtools_genomecov/script.sh | 5 ++++- src/bedtools/bedtools_genomecov/test.sh | 3 ++- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/bedtools/bedtools_genomecov/config.vsh.yaml b/src/bedtools/bedtools_genomecov/config.vsh.yaml index 0dddd7c7..775587de 100644 --- a/src/bedtools/bedtools_genomecov/config.vsh.yaml +++ b/src/bedtools/bedtools_genomecov/config.vsh.yaml @@ -182,6 +182,7 @@ argument_groups: -trackopts 'name="My Track" visibility=2 color=255,30,30' Note the use of single-quotes if you have spaces in your parameters. - (TEXT) + multiple: true resources: - type: bash_script diff --git a/src/bedtools/bedtools_genomecov/script.sh b/src/bedtools/bedtools_genomecov/script.sh index 28e005fe..20fbd968 100644 --- a/src/bedtools/bedtools_genomecov/script.sh +++ b/src/bedtools/bedtools_genomecov/script.sh @@ -28,6 +28,9 @@ for par in ${unset_if_false[@]}; do [[ "$test_val" == "false" ]] && unset $par done +# Create input array +IFS=";" read -ra trackopts <<< $par_trackopts + bedtools genomecov \ ${par_depth:+-d} \ ${par_depth_zero:+-dz} \ @@ -42,7 +45,7 @@ bedtools genomecov \ ${par_strand:+-strand "$par_strand"} \ ${par_max:+-max "$par_max"} \ ${par_scale:+-scale "$par_scale"} \ - ${par_trackopts:+-trackopts "$par_trackopts"} \ + ${par_trackopts:+-trackopts "${trackopts[*]}"} \ ${par_input_bam:+-ibam "$par_input_bam"} \ ${par_input:+-i "$par_input"} \ ${par_genome:+-g "$par_genome"} \ diff --git a/src/bedtools/bedtools_genomecov/test.sh b/src/bedtools/bedtools_genomecov/test.sh index 89d53061..7e4487da 100644 --- a/src/bedtools/bedtools_genomecov/test.sh +++ b/src/bedtools/bedtools_genomecov/test.sh @@ -95,7 +95,7 @@ chr2 128 228 100 chr2 428 528 100 EOF cat > "$TMPDIR/expected_trackopts.bed" < Run bedtools_genomecov on BED file with -bg and -trackopts" --output "output.bed" \ --bed_graph \ --trackopts "name=example" \ + --trackopts "llama=Alpaco" \ # checks assert_file_exists "output.bed"