-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #4 from viash-hub/add_bgzip
Add bgzip
- Loading branch information
Showing
4 changed files
with
182 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
functionality: | ||
name: bgzip | ||
description: Block compression/decompression utility | ||
info: | ||
homepage: https://www.htslib.org/ | ||
documentation: https://www.htslib.org/doc/bgzip.html | ||
repository: https://github.com/samtools/htslib | ||
licence: MIT | ||
requirements: | ||
cpus: 1 | ||
commands: [ bgzip ] | ||
argument_groups: | ||
- name: Inputs | ||
arguments: | ||
- name: --input | ||
type: file | ||
direction: input | ||
description: file to be compressed or decompressed | ||
required: true | ||
- name: Outputs | ||
arguments: | ||
- name: --output | ||
type: file | ||
direction: output | ||
description: compressed or decompressed output | ||
required: true | ||
- name: --index_name | ||
alternatives: -I | ||
type: file | ||
direction: output | ||
description: name of BGZF index file [file.gz.gzi] | ||
- name: Arguments | ||
arguments: | ||
- name: offset | ||
alternatives: -b | ||
type: integer | ||
description: decompress at virtual file pointer (0-based uncompressed offset) | ||
- name: --decompress | ||
alternatives: -d | ||
type: boolean_true | ||
description: decompress the input file | ||
- name: --rebgzip | ||
alternatives: -g | ||
type: boolean_true | ||
description: use an index file to bgzip a file | ||
- name: --index | ||
alternatives: -i | ||
type: boolean_true | ||
description: compress and create BGZF index | ||
- name: --compress_level | ||
alternatives: -l | ||
type: integer | ||
description: compression level to use when compressing; 0 to 9, or -1 for default [-1] | ||
min: -1 | ||
max: 9 | ||
- name: --reindex | ||
alternatives: -r | ||
type: boolean_true | ||
description: (re)index the output file | ||
- name: --size | ||
alternatives: -s | ||
type: integer | ||
description: decompress INT bytes (uncompressed size) | ||
min: 0 | ||
- name: --test | ||
alternatives: -t | ||
type: boolean_true | ||
description: test integrity of compressed file | ||
- name: --binary | ||
type: boolean_true | ||
description: Don't align blocks with text lines | ||
resources: | ||
- type: bash_script | ||
text: | | ||
[[ "$par_decompress" == "false" ]] && unset par_decompress | ||
[[ "$par_rebgzip" == "false" ]] && unset par_rebgzip | ||
[[ "$par_index" == "false" ]] && unset par_index | ||
[[ "$par_reindex" == "false" ]] && unset par_reindex | ||
[[ "$par_test" == "false" ]] && unset par_test | ||
[[ "$par_binary" == "false" ]] && unset par_binary | ||
bgzip -c \ | ||
${meta_cpus:+--threads "${meta_cpus}"} \ | ||
${par_decompress:+-d} \ | ||
${par_rebgzip:+-g} \ | ||
${par_index:+-i} \ | ||
${par_index_name:+-I "${par_index_name}"} \ | ||
${par_compress_level:+-l "${par_compress_level}"} \ | ||
${par_reindex:+-r} \ | ||
${par_size:+-s "${par_size}"} \ | ||
${par_test:+-t} \ | ||
"$par_input" > "$par_output" | ||
test_resources: | ||
- type: bash_script | ||
text: | | ||
set -e | ||
"$meta_executable" --input "$meta_resources_dir/test_data/test.vcf" --output "test.vcf.gz" | ||
echo ">> Checking output of compressing" | ||
[ ! -f "test.vcf.gz" ] && echo "Output file test.vcf.gz does not exist" && exit 1 | ||
"$meta_executable" --input "test.vcf.gz" --output "test.vcf" --decompress | ||
echo ">> Checking output of decompressing" | ||
[ ! -f "test.vcf" ] && echo "Output file test.vcf does not exist" && exit 1 | ||
echo ">> Checking original and decompressed files are the same" | ||
set +e | ||
cmp --silent -- "$meta_resources_dir/test_data/test.vcf" "test.vcf" | ||
[ $? -ne 0 ] && echo "files are different" && exit 1 | ||
set -e | ||
echo "> Test successful" | ||
- type: file | ||
path: test_data | ||
|
||
platforms: | ||
- type: docker | ||
image: quay.io/biocontainers/htslib:1.19--h81da01d_0 | ||
setup: | ||
- type: docker | ||
run: | | ||
bgzip -h | grep 'Version:' 2>&1 | sed 's/Version:\s\(.*\)/arriba: "\1"/' > /var/software_versions.txt | ||
- type: nextflow |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
```bash | ||
bgzip -h | ||
``` | ||
|
||
Version: 1.19 | ||
Usage: bgzip [OPTIONS] [FILE] ... | ||
Options: | ||
-b, --offset INT decompress at virtual file pointer (0-based uncompressed offset) | ||
-c, --stdout write on standard output, keep original files unchanged | ||
-d, --decompress decompress | ||
-f, --force overwrite files without asking | ||
-g, --rebgzip use an index file to bgzip a file | ||
-h, --help give this help | ||
-i, --index compress and create BGZF index | ||
-I, --index-name FILE name of BGZF index file [file.gz.gzi] | ||
-k, --keep don't delete input files during operation | ||
-l, --compress-level INT Compression level to use when compressing; 0 to 9, or -1 for default [-1] | ||
-r, --reindex (re)index compressed file | ||
-s, --size INT decompress INT bytes (uncompressed size) | ||
-t, --test test integrity of compressed file | ||
--binary Don't align blocks with text lines | ||
-@, --threads INT number of compression threads to use [1] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
# arriba test data | ||
|
||
Test data was obtained from https://github.com/snakemake/snakemake-wrappers/tree/master/bio/bgzip/test. | ||
|
||
__author__ = "William Rowell" | ||
__copyright__ = "Copyright 2020, William Rowell" | ||
__email__ = "[email protected]" | ||
__license__ = "MIT" | ||
|
||
```bash | ||
git clone --depth 1 --single-branch --branch master https://github.com/snakemake/snakemake-wrappers /tmp/snakemake-wrappers | ||
cp -r /tmp/snakemake-wrappers/bio/bgzip/test/* src/bgzip/test_data | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
##fileformat=VCFv4.0 | ||
##fileDate=20090805 | ||
##source=https://www.internationalgenome.org/wiki/Analysis/vcf4.0/ | ||
##reference=1000GenomesPilot-NCBI36 | ||
##phasing=partial | ||
##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data"> | ||
##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth"> | ||
##INFO=<ID=AF,Number=.,Type=Float,Description="Allele Frequency"> | ||
##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele"> | ||
##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129"> | ||
##INFO=<ID=H2,Number=0,Type=Flag,Description="HapMap2 membership"> | ||
##FILTER=<ID=q10,Description="Quality below 10"> | ||
##FILTER=<ID=s50,Description="Less than 50% of samples have data"> | ||
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> | ||
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality"> | ||
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth"> | ||
##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality"> | ||
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003 | ||
20 14370 rs6054257 G A 29 PASS NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:.,. | ||
20 17330 . T A 3 q10 NS=3;DP=11;AF=0.017 GT:GQ:DP:HQ 0|0:49:3:58,50 0|1:3:5:65,3 0/0:41:3 | ||
20 1110696 rs6040355 A G,T 67 PASS NS=2;DP=10;AF=0.333,0.667;AA=T;DB GT:GQ:DP:HQ 1|2:21:6:23,27 2|1:2:0:18,2 2/2:35:4 | ||
20 1230237 . T . 47 PASS NS=3;DP=13;AA=T GT:GQ:DP:HQ 0|0:54:7:56,60 0|0:48:4:51,51 0/0:61:2 | ||
20 1234567 microsat1 GTCT G,GTACT 50 PASS NS=3;DP=9;AA=G GT:GQ:DP 0/1:35:4 0/2:17:2 1/1:40:3 |