From 9889be55e353479eefa58cc3ae5b12349c9e6d1b Mon Sep 17 00:00:00 2001 From: Hendrik Cannoodt Date: Fri, 19 Jan 2024 15:52:03 +0100 Subject: [PATCH 1/4] Quick conversion from snakemake wrapper --- src/bgzip/config.vsh.yaml | 43 +++++++++++++++++++++++++++++++++++++++ src/bgzip/help.txt | 19 +++++++++++++++++ src/bgzip/test/test.vcf | 23 +++++++++++++++++++++ 3 files changed, 85 insertions(+) create mode 100644 src/bgzip/config.vsh.yaml create mode 100644 src/bgzip/help.txt create mode 100644 src/bgzip/test/test.vcf diff --git a/src/bgzip/config.vsh.yaml b/src/bgzip/config.vsh.yaml new file mode 100644 index 00000000..7e98592f --- /dev/null +++ b/src/bgzip/config.vsh.yaml @@ -0,0 +1,43 @@ +functionality: + name: bgzip + description: Block compression/decompression utility + arguments: + - name: --input + type: file + direction: input + description: file to be compressed or decompressed + required: true + - name: --output + type: file + direction: output + description: compressed or decompressed output + required: true + - name: --extra + type: string + description: extra arguments to pass to bgzip + required: false + - name: --threads + type: integer + description: number of threads to use + required: false + default: 1 + # - name: log + # type: file + # direction: output + # description: log file + # required: false + resources: + - type: bash_script + text: | + bgzip -c "$par_extra" --threads "$par_threads" "$par_input" > "$par_output" + test_resources: + - type: bash_script + text: | + "$meta_executable" --input "$meta_resources_dir/test/test.vcf" --output "$meta_resources_dir/test.vcf.gz" + - type: file + path: test + info: + repository: https://github.com/samtools/htslib +platforms: + - type: docker + image: quay.io/biocontainers/htslib:1.19--h81da01d_0 \ No newline at end of file diff --git a/src/bgzip/help.txt b/src/bgzip/help.txt new file mode 100644 index 00000000..ad75f6d1 --- /dev/null +++ b/src/bgzip/help.txt @@ -0,0 +1,19 @@ + +Version: 1.19 +Usage: bgzip [OPTIONS] [FILE] ... +Options: + -b, --offset INT decompress at virtual file pointer (0-based uncompressed offset) + -c, --stdout write on standard output, keep original files unchanged + -d, --decompress decompress + -f, --force overwrite files without asking + -g, --rebgzip use an index file to bgzip a file + -h, --help give this help + -i, --index compress and create BGZF index + -I, --index-name FILE name of BGZF index file [file.gz.gzi] + -k, --keep don't delete input files during operation + -l, --compress-level INT Compression level to use when compressing; 0 to 9, or -1 for default [-1] + -r, --reindex (re)index compressed file + -s, --size INT decompress INT bytes (uncompressed size) + -t, --test test integrity of compressed file + --binary Don't align blocks with text lines + -@, --threads INT number of compression threads to use [1] diff --git a/src/bgzip/test/test.vcf b/src/bgzip/test/test.vcf new file mode 100644 index 00000000..11b5400e --- /dev/null +++ b/src/bgzip/test/test.vcf @@ -0,0 +1,23 @@ +##fileformat=VCFv4.0 +##fileDate=20090805 +##source=https://www.internationalgenome.org/wiki/Analysis/vcf4.0/ +##reference=1000GenomesPilot-NCBI36 +##phasing=partial +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003 +20 14370 rs6054257 G A 29 PASS NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:.,. +20 17330 . T A 3 q10 NS=3;DP=11;AF=0.017 GT:GQ:DP:HQ 0|0:49:3:58,50 0|1:3:5:65,3 0/0:41:3 +20 1110696 rs6040355 A G,T 67 PASS NS=2;DP=10;AF=0.333,0.667;AA=T;DB GT:GQ:DP:HQ 1|2:21:6:23,27 2|1:2:0:18,2 2/2:35:4 +20 1230237 . T . 47 PASS NS=3;DP=13;AA=T GT:GQ:DP:HQ 0|0:54:7:56,60 0|0:48:4:51,51 0/0:61:2 +20 1234567 microsat1 GTCT G,GTACT 50 PASS NS=3;DP=9;AA=G GT:GQ:DP 0/1:35:4 0/2:17:2 1/1:40:3 From 3f13bea26ea49314b5a60662513fdb0936fea7a7 Mon Sep 17 00:00:00 2001 From: Hendrik Cannoodt Date: Tue, 23 Jan 2024 15:42:13 +0100 Subject: [PATCH 2/4] reorder the fields a bit and expand the info field --- src/bgzip/config.vsh.yaml | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/bgzip/config.vsh.yaml b/src/bgzip/config.vsh.yaml index 7e98592f..a4fe18e3 100644 --- a/src/bgzip/config.vsh.yaml +++ b/src/bgzip/config.vsh.yaml @@ -1,6 +1,14 @@ functionality: name: bgzip description: Block compression/decompression utility + info: + homepage: https://www.htslib.org/ + documentation: https://www.htslib.org/doc/bgzip.html + repository: https://github.com/samtools/htslib + licence: MIT + requirements: + cpus: 1 + commands: [ bgzip ] arguments: - name: --input type: file @@ -21,11 +29,6 @@ functionality: description: number of threads to use required: false default: 1 - # - name: log - # type: file - # direction: output - # description: log file - # required: false resources: - type: bash_script text: | @@ -36,8 +39,7 @@ functionality: "$meta_executable" --input "$meta_resources_dir/test/test.vcf" --output "$meta_resources_dir/test.vcf.gz" - type: file path: test - info: - repository: https://github.com/samtools/htslib + platforms: - type: docker image: quay.io/biocontainers/htslib:1.19--h81da01d_0 \ No newline at end of file From c1043613a0d58195c78044cc8fa05cbbfe0a8a81 Mon Sep 17 00:00:00 2001 From: Hendrik Cannoodt Date: Fri, 26 Jan 2024 10:48:50 +0100 Subject: [PATCH 3/4] improve implementation of regular script and test script --- src/bgzip/config.vsh.yaml | 48 +++++++++++++++++++------- src/bgzip/test_data/README.md | 13 +++++++ src/bgzip/{test => test_data}/test.vcf | 0 3 files changed, 48 insertions(+), 13 deletions(-) create mode 100644 src/bgzip/test_data/README.md rename src/bgzip/{test => test_data}/test.vcf (100%) diff --git a/src/bgzip/config.vsh.yaml b/src/bgzip/config.vsh.yaml index a4fe18e3..bae1eb20 100644 --- a/src/bgzip/config.vsh.yaml +++ b/src/bgzip/config.vsh.yaml @@ -20,26 +20,48 @@ functionality: direction: output description: compressed or decompressed output required: true - - name: --extra - type: string - description: extra arguments to pass to bgzip - required: false - - name: --threads - type: integer - description: number of threads to use - required: false - default: 1 + - name: --decompress + alternatives: -d + type: boolean_true + description: decompress the input file resources: - type: bash_script text: | - bgzip -c "$par_extra" --threads "$par_threads" "$par_input" > "$par_output" + [[ "$par_decompress" == "false" ]] && unset par_decompress + bgzip -c \ + ${meta_cpus:+--threads "${meta_cpus}"} \ + ${par_decompress:+-d} \ + "$par_input" > "$par_output" test_resources: - type: bash_script text: | - "$meta_executable" --input "$meta_resources_dir/test/test.vcf" --output "$meta_resources_dir/test.vcf.gz" + set -e + + "$meta_executable" --input "$meta_resources_dir/test_data/test.vcf" --output "test.vcf.gz" + + echo ">> Checking output of compressing" + [ ! -f "test.vcf.gz" ] && echo "Output file test.vcf.gz does not exist" && exit 1 + + "$meta_executable" --input "test.vcf.gz" --output "test.vcf" --decompress + + echo ">> Checking output of decompressing" + [ ! -f "test.vcf" ] && echo "Output file test.vcf does not exist" && exit 1 + + echo ">> Checking original and decompressed files are the same" + set +e + cmp --silent -- "$meta_resources_dir/test_data/test.vcf" "test.vcf" + [ $? -ne 0 ] && echo "files are different" && exit 1 + set -e + + echo "> Test successful" - type: file - path: test + path: test_data platforms: - type: docker - image: quay.io/biocontainers/htslib:1.19--h81da01d_0 \ No newline at end of file + image: quay.io/biocontainers/htslib:1.19--h81da01d_0 + setup: + - type: docker + run: | + bgzip -h | grep 'Version:' 2>&1 | sed 's/Version:\s\(.*\)/arriba: "\1"/' > /var/software_versions.txt + - type: nextflow \ No newline at end of file diff --git a/src/bgzip/test_data/README.md b/src/bgzip/test_data/README.md new file mode 100644 index 00000000..ce46295a --- /dev/null +++ b/src/bgzip/test_data/README.md @@ -0,0 +1,13 @@ +# arriba test data + +Test data was obtained from https://github.com/snakemake/snakemake-wrappers/tree/master/bio/bgzip/test. + +__author__ = "William Rowell" +__copyright__ = "Copyright 2020, William Rowell" +__email__ = "wrowell@pacb.com" +__license__ = "MIT" + +```bash +git clone --depth 1 --single-branch --branch master https://github.com/snakemake/snakemake-wrappers /tmp/snakemake-wrappers +cp -r /tmp/snakemake-wrappers/bio/bgzip/test/* src/bgzip/test_data +``` \ No newline at end of file diff --git a/src/bgzip/test/test.vcf b/src/bgzip/test_data/test.vcf similarity index 100% rename from src/bgzip/test/test.vcf rename to src/bgzip/test_data/test.vcf From 4a4ed79cfead717b5678e34ec0b5e9e70ec6ffa8 Mon Sep 17 00:00:00 2001 From: Hendrik Cannoodt Date: Tue, 30 Jan 2024 15:53:06 +0100 Subject: [PATCH 4/4] Add extra arguments --- src/bgzip/config.vsh.yaml | 87 ++++++++++++++++++++++++++++++++------- src/bgzip/help.txt | 3 ++ 2 files changed, 75 insertions(+), 15 deletions(-) diff --git a/src/bgzip/config.vsh.yaml b/src/bgzip/config.vsh.yaml index bae1eb20..38c7baab 100644 --- a/src/bgzip/config.vsh.yaml +++ b/src/bgzip/config.vsh.yaml @@ -9,28 +9,85 @@ functionality: requirements: cpus: 1 commands: [ bgzip ] - arguments: - - name: --input - type: file - direction: input - description: file to be compressed or decompressed - required: true - - name: --output - type: file - direction: output - description: compressed or decompressed output - required: true - - name: --decompress - alternatives: -d - type: boolean_true - description: decompress the input file + argument_groups: + - name: Inputs + arguments: + - name: --input + type: file + direction: input + description: file to be compressed or decompressed + required: true + - name: Outputs + arguments: + - name: --output + type: file + direction: output + description: compressed or decompressed output + required: true + - name: --index_name + alternatives: -I + type: file + direction: output + description: name of BGZF index file [file.gz.gzi] + - name: Arguments + arguments: + - name: offset + alternatives: -b + type: integer + description: decompress at virtual file pointer (0-based uncompressed offset) + - name: --decompress + alternatives: -d + type: boolean_true + description: decompress the input file + - name: --rebgzip + alternatives: -g + type: boolean_true + description: use an index file to bgzip a file + - name: --index + alternatives: -i + type: boolean_true + description: compress and create BGZF index + - name: --compress_level + alternatives: -l + type: integer + description: compression level to use when compressing; 0 to 9, or -1 for default [-1] + min: -1 + max: 9 + - name: --reindex + alternatives: -r + type: boolean_true + description: (re)index the output file + - name: --size + alternatives: -s + type: integer + description: decompress INT bytes (uncompressed size) + min: 0 + - name: --test + alternatives: -t + type: boolean_true + description: test integrity of compressed file + - name: --binary + type: boolean_true + description: Don't align blocks with text lines resources: - type: bash_script text: | [[ "$par_decompress" == "false" ]] && unset par_decompress + [[ "$par_rebgzip" == "false" ]] && unset par_rebgzip + [[ "$par_index" == "false" ]] && unset par_index + [[ "$par_reindex" == "false" ]] && unset par_reindex + [[ "$par_test" == "false" ]] && unset par_test + [[ "$par_binary" == "false" ]] && unset par_binary bgzip -c \ ${meta_cpus:+--threads "${meta_cpus}"} \ ${par_decompress:+-d} \ + ${par_rebgzip:+-g} \ + ${par_index:+-i} \ + ${par_index_name:+-I "${par_index_name}"} \ + ${par_compress_level:+-l "${par_compress_level}"} \ + ${par_reindex:+-r} \ + ${par_size:+-s "${par_size}"} \ + ${par_test:+-t} \ "$par_input" > "$par_output" test_resources: - type: bash_script diff --git a/src/bgzip/help.txt b/src/bgzip/help.txt index ad75f6d1..d4012efd 100644 --- a/src/bgzip/help.txt +++ b/src/bgzip/help.txt @@ -1,3 +1,6 @@ +```bash +bgzip -h +``` Version: 1.19 Usage: bgzip [OPTIONS] [FILE] ...