From 7269ae4e7b4d4aa2b8e1631a216a5531eb7165b6 Mon Sep 17 00:00:00 2001 From: Theodoro Gasperin Terra Camargo <98555209+tgaspe@users.noreply.github.com> Date: Mon, 2 Sep 2024 14:42:44 +0200 Subject: [PATCH] Bedtools links (#137) * Initial Commit * Tests * Adding help file * Adding more description * Update test.sh * Update help.txt * Update CHANGELOG.md --- CHANGELOG.md | 1 + src/bedtools/bedtools_links/config.vsh.yaml | 91 +++++++++++++++++++ src/bedtools/bedtools_links/help.txt | 25 ++++++ src/bedtools/bedtools_links/script.sh | 14 +++ src/bedtools/bedtools_links/test.sh | 98 +++++++++++++++++++++ 5 files changed, 229 insertions(+) create mode 100644 src/bedtools/bedtools_links/config.vsh.yaml create mode 100644 src/bedtools/bedtools_links/help.txt create mode 100644 src/bedtools/bedtools_links/script.sh create mode 100644 src/bedtools/bedtools_links/test.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index 8c1af805..6dda7ab4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,7 @@ - `bedtools/bedtools_merge`: Merges overlapping BED/GFF/VCF entries into a single interval (PR #118). - `bedtools/bedtools_bamtofastq`: Convert BAM alignments to FASTQ files (PR #101). - `bedtools/bedtools_bedtobam`: Converts genomic feature records (bed/gff/vcf) to BAM format (PR #111). + - `bedtools/bedtools_links`: Creates an HTML file with links to an instance of the UCSC Genome Browser for all features / intervals in a (bed/gff/vcf) file (PR #137). * `qualimap/qualimap_rnaseq`: RNA-seq QC analysis using qualimap (PR #74). diff --git a/src/bedtools/bedtools_links/config.vsh.yaml b/src/bedtools/bedtools_links/config.vsh.yaml new file mode 100644 index 00000000..b4e43cd3 --- /dev/null +++ b/src/bedtools/bedtools_links/config.vsh.yaml @@ -0,0 +1,91 @@ +name: bedtools_links +namespace: bedtools +description: | + Creates an HTML file with links to an instance of the UCSC Genome Browser for all features / intervals in a file. + This is useful for cases when one wants to manually inspect through a large set of annotations or features. +keywords: [Links, BED, GFF, VCF] +links: + documentation: https://bedtools.readthedocs.io/en/latest/content/tools/links.html + repository: https://github.com/arq5x/bedtools2 + homepage: https://bedtools.readthedocs.io/en/latest/# + issue_tracker: https://github.com/arq5x/bedtools2/issues +references: + doi: 10.1093/bioinformatics/btq033 +license: MIT +requirements: + commands: [bedtools] +authors: + - __merge__: /src/_authors/theodoro_gasperin.yaml + roles: [ author, maintainer ] + +argument_groups: + - name: Inputs + arguments: + - name: --input + alternatives: -i + type: file + description: Input file (bed/gff/vcf). + required: true + + - name: Outputs + arguments: + - name: --output + alternatives: -o + type: file + direction: output + description: Output HTML file to be written. + + - name: Options + description: | + By default, the links created will point to human (hg18) UCSC browser. + If you have a local mirror, you can override this behavior by supplying + the -base, -org, and -db options. + + For example, if the URL of your local mirror for mouse MM9 is called: + http://mymirror.myuniversity.edu, then you would use the following: + --base_url http://mymirror.myuniversity.edu + --organism mouse + --database mm9 + arguments: + - name: --base_url + alternatives: -base + type: string + description: | + The “basename” for the UCSC browser. + default: http://genome.ucsc.edu + + - name: --organism + alternatives: -org + type: string + description: | + The organism (e.g. mouse, human). + default: human + + - name: --database + alternatives: -db + type: string + description: | + The genome build. + default: hg18 + +resources: + - type: bash_script + path: script.sh + +test_resources: + - type: bash_script + path: test.sh + +engines: + - type: docker + image: debian:stable-slim + setup: + - type: apt + packages: [bedtools, procps] + - type: docker + run: | + echo "bedtools: \"$(bedtools --version | sed -n 's/^bedtools //p')\"" > /var/software_versions.txt + +runners: + - type: executable + - type: nextflow diff --git a/src/bedtools/bedtools_links/help.txt b/src/bedtools/bedtools_links/help.txt new file mode 100644 index 00000000..d848d989 --- /dev/null +++ b/src/bedtools/bedtools_links/help.txt @@ -0,0 +1,25 @@ +``` +bedtools links -h +``` + +Tool: bedtools links (aka linksBed) +Version: v2.30.0 +Summary: Creates HTML links to an UCSC Genome Browser from a feature file. + +Usage: bedtools links [OPTIONS] -i > out.html + +Options: + -base The browser basename. Default: http://genome.ucsc.edu + -org The organism. Default: human + -db The build. Default: hg18 + +Example: + By default, the links created will point to human (hg18) UCSC browser. + If you have a local mirror, you can override this behavior by supplying + the -base, -org, and -db options. + + For example, if the URL of your local mirror for mouse MM9 is called: + http://mymirror.myuniversity.edu, then you would use the following: + -base http://mymirror.myuniversity.edu + -org mouse + -db mm9 diff --git a/src/bedtools/bedtools_links/script.sh b/src/bedtools/bedtools_links/script.sh new file mode 100644 index 00000000..b8ee9a56 --- /dev/null +++ b/src/bedtools/bedtools_links/script.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +## VIASH START +## VIASH END + +set -eo pipefail + +# Execute bedtools links +bedtools links \ + ${par_base_url:+-base "$par_base_url"} \ + ${par_organism:+-org "$par_organism"} \ + ${par_database:+-db "$par_database"} \ + -i "$par_input" \ + > "$par_output" diff --git a/src/bedtools/bedtools_links/test.sh b/src/bedtools/bedtools_links/test.sh new file mode 100644 index 00000000..d79cbd6c --- /dev/null +++ b/src/bedtools/bedtools_links/test.sh @@ -0,0 +1,98 @@ +#!/bin/bash + +# exit on error +set -eo pipefail + +############################################# +# helper functions +assert_file_exists() { + [ -f "$1" ] || { echo "File '$1' does not exist" && exit 1; } +} +assert_file_not_empty() { + [ -s "$1" ] || { echo "File '$1' is empty but shouldn't be" && exit 1; } +} +assert_file_contains() { + grep -q "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; } +} +assert_identical_content() { + diff -a "$2" "$1" \ + || (echo "Files are not identical!" && exit 1) +} +############################################# + +# Create directories for tests +echo "Creating Test Data..." +TMPDIR=$(mktemp -d "$meta_temp_dir/XXXXXX") +function clean_up { + [[ -d "$TMPDIR" ]] && rm -r "$TMPDIR" +} +trap clean_up EXIT + +# Create test data +cat < "$TMPDIR/genes.bed" +chr21 9928613 10012791 uc002yip.1 0 - +chr21 9928613 10012791 uc002yiq.1 0 - +chr21 9928613 10012791 uc002yir.1 0 - +chr21 9928613 10012791 uc010gkv.1 0 - +chr21 9928613 10061300 uc002yis.1 0 - +chr21 10042683 10120796 uc002yit.1 0 - +chr21 10042683 10120808 uc002yiu.1 0 - +chr21 10079666 10120808 uc002yiv.1 0 - +chr21 10080031 10081687 uc002yiw.1 0 - +chr21 10081660 10120796 uc002yix.2 0 - +EOF + +# Test 1: Default Use +mkdir "$TMPDIR/test1" && pushd "$TMPDIR/test1" > /dev/null + +echo "> Run bedtools_links on BED file" +"$meta_executable" \ + --input "../genes.bed" \ + --output "genes.html" + +# checks +assert_file_exists "genes.html" +assert_file_not_empty "genes.html" +assert_file_contains "genes.html" "uc002yip.1" +echo "- test1 succeeded -" + +popd > /dev/null + +# Test 2: Base URL +mkdir "$TMPDIR/test2" && pushd "$TMPDIR/test2" > /dev/null + +echo "> Run bedtools_links with base option" +"$meta_executable" \ + --input "../genes.bed" \ + --output "genes.html" \ + --base_url "http://genome.ucsc.edu" + +# checks +assert_file_exists "genes.html" +assert_file_not_empty "genes.html" +assert_file_contains "genes.html" "uc002yip.1" +echo "- test2 succeeded -" + +popd > /dev/null + +# Test 3: Organism and Genome Database Build +mkdir "$TMPDIR/test3" && pushd "$TMPDIR/test3" > /dev/null + +echo "> Run bedtools_links with organism option and genome database build" +"$meta_executable" \ + --input "../genes.bed" \ + --output "genes.html" \ + --base_url "http://genome.ucsc.edu" \ + --organism "mouse" \ + --database "mm9" + +# checks +assert_file_exists "genes.html" +assert_file_not_empty "genes.html" +assert_file_contains "genes.html" "uc002yip.1" +echo "- test3 succeeded -" + +popd > /dev/null + +echo "---- All tests succeeded! ----" +exit 0