Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into extend_contributing
Browse files Browse the repository at this point in the history
  • Loading branch information
rcannood committed Feb 2, 2024
2 parents 60b9364 + f54af0a commit 8c8ad10
Show file tree
Hide file tree
Showing 28 changed files with 11,871 additions and 0 deletions.
105 changes: 105 additions & 0 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
name: Component Testing

on:
pull_request:
push:
branches: [ '**' ]

jobs:
run_ci_check_job:
runs-on: ubuntu-latest
outputs:
run_ci: ${{ steps.github_cli.outputs.check }}
steps:
- name: 'Check if branch has an existing pull request and the trigger was a push'
id: github_cli
run: |
pull_request=$(gh pr list -R ${{ github.repository }} -H ${{ github.ref_name }} --json url --state open --limit 1 | jq '.[0].url')
# If the branch has a PR and this run was triggered by a push event, do not run
if [[ "$pull_request" != "null" && "$GITHUB_REF_NAME" != "main" && "${{ github.event_name == 'push' }}" == "true" && "${{ !contains(github.event.head_commit.message, 'ci force') }}" == "true" ]]; then
echo "check=false" >> $GITHUB_OUTPUT
else
echo "check=true" >> $GITHUB_OUTPUT
fi
env:
GH_TOKEN: ${{ github.token }}

# phase 1
list:
needs: run_ci_check_job
runs-on: ubuntu-latest
if: ${{ needs.run_ci_check_job.outputs.run_ci == 'true' }}

outputs:
matrix: ${{ steps.set_matrix.outputs.matrix }}

steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Get head git commit message
id: get_head_commit_message
run: echo "HEAD_COMMIT_MESSAGE=$(git show -s --format=%s ${{ github.event.pull_request.head.sha || github.sha }})" >> "$GITHUB_OUTPUT"

- uses: viash-io/viash-actions/setup@v5

- name: Check if all config can be parsed if there is no unicode support
run: |
LANG=C viash ns list > /dev/null
- name: Get changed files
id: changed-files
uses: tj-actions/changed-files@v42
with:
separator: ";"
diff_relative: true

- id: ns_list
uses: viash-io/viash-actions/ns-list@v5
with:
platform: docker
format: json
query: ^(?!workflows)

- id: ns_list_filtered
uses: viash-io/viash-actions/project/detect-changed-components@v5
with:
input_file: "${{ steps.ns_list.outputs.output_file }}"

- id: set_matrix
run: |
echo "matrix=$(jq -c '[ .[] |
{
"name": (.functionality.namespace + "/" + .functionality.name),
"config": .info.config,
"dir": .info.config | capture("^(?<dir>.*\/)").dir
}
]' ${{ contains(steps.get_head_commit_message.outputs.HEAD_COMMIT_MESSAGE, 'ci force') && steps.ns_list.outputs.output_file || steps.ns_list_filtered.outputs.output_file }} )" >> $GITHUB_OUTPUT
# phase 2
viash_test:
needs: list
if: ${{ needs.list.outputs.matrix != '[]' && needs.list.outputs.matrix != '' }}
runs-on: ubuntu-latest

strategy:
fail-fast: false
matrix:
component: ${{ fromJson(needs.list.outputs.matrix) }}

steps:
# Remove unnecessary files to free up space. Otherwise, we get 'no space left on device.'
- uses: data-intuitive/reclaim-the-bytes@v2

- uses: actions/checkout@v4

- uses: viash-io/viash-actions/setup@v5

- name: Run test
timeout-minutes: 30
run: |
viash test \
"${{ matrix.component.config }}" \
--cpus 2 \
--memory "6gb"
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,14 @@

* `fastp`: An ultra-fast all-in-one FASTQ preprocessor (PR #3).

* `busco`: Assess genome assembly and annotation completeness with single copy orthologs (PR #6).

* `featurecounts`: Assign sequence reads to genomic features (PR #11).

* `bgzip`: Add bgzip functionality to compress and decompress files (PR #13).

* `pear`: Paired-end read merger (PR #10).

## MAJOR CHANGES

## MINOR CHANGES
Expand Down
127 changes: 127 additions & 0 deletions src/bgzip/config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
functionality:
name: bgzip
description: Block compression/decompression utility
info:
homepage: https://www.htslib.org/
documentation: https://www.htslib.org/doc/bgzip.html
repository: https://github.com/samtools/htslib
licence: MIT
reference:
doi: 10.1093/gigascience/giab007
requirements:
commands: [ bgzip ]
argument_groups:
- name: Inputs
arguments:
- name: --input
type: file
direction: input
description: file to be compressed or decompressed
required: true
- name: Outputs
arguments:
- name: --output
type: file
direction: output
description: compressed or decompressed output
required: true
- name: --index_name
alternatives: -I
type: file
direction: output
description: name of BGZF index file [file.gz.gzi]
- name: Arguments
arguments:
- name: --offset
alternatives: -b
type: integer
description: decompress at virtual file pointer (0-based uncompressed offset)
- name: --decompress
alternatives: -d
type: boolean_true
description: decompress the input file
- name: --rebgzip
alternatives: -g
type: boolean_true
description: use an index file to bgzip a file
- name: --index
alternatives: -i
type: boolean_true
description: compress and create BGZF index
- name: --compress_level
alternatives: -l
type: integer
description: compression level to use when compressing; 0 to 9, or -1 for default [-1]
min: -1
max: 9
- name: --reindex
alternatives: -r
type: boolean_true
description: (re)index the output file
- name: --size
alternatives: -s
type: integer
description: decompress INT bytes (uncompressed size)
min: 0
- name: --test
alternatives: -t
type: boolean_true
description: test integrity of compressed file
- name: --binary
type: boolean_true
description: Don't align blocks with text lines
resources:
- type: bash_script
text: |
[[ "$par_decompress" == "false" ]] && unset par_decompress
[[ "$par_rebgzip" == "false" ]] && unset par_rebgzip
[[ "$par_index" == "false" ]] && unset par_index
[[ "$par_reindex" == "false" ]] && unset par_reindex
[[ "$par_test" == "false" ]] && unset par_test
[[ "$par_binary" == "false" ]] && unset par_binary
bgzip -c \
${meta_cpus:+--threads "${meta_cpus}"} \
${par_offset:+-b "${par_offset}"} \
${par_decompress:+-d} \
${par_rebgzip:+-g} \
${par_index:+-i} \
${par_index_name:+-I "${par_index_name}"} \
${par_compress_level:+-l "${par_compress_level}"} \
${par_reindex:+-r} \
${par_size:+-s "${par_size}"} \
${par_test:+-t} \
${par_binary:+--binary} \
"$par_input" > "$par_output"
test_resources:
- type: bash_script
text: |
set -e
"$meta_executable" --input "$meta_resources_dir/test_data/test.vcf" --output "test.vcf.gz"
echo ">> Checking output of compressing"
[ ! -f "test.vcf.gz" ] && echo "Output file test.vcf.gz does not exist" && exit 1
"$meta_executable" --input "test.vcf.gz" --output "test.vcf" --decompress
echo ">> Checking output of decompressing"
[ ! -f "test.vcf" ] && echo "Output file test.vcf does not exist" && exit 1
echo ">> Checking original and decompressed files are the same"
set +e
cmp --silent -- "$meta_resources_dir/test_data/test.vcf" "test.vcf"
[ $? -ne 0 ] && echo "files are different" && exit 1
set -e
echo "> Test successful"
- type: file
path: test_data

platforms:
- type: docker
image: quay.io/biocontainers/htslib:1.19--h81da01d_0
setup:
- type: docker
run: |
bgzip -h | grep 'Version:' 2>&1 | sed 's/Version:\s\(.*\)/bgzip: "\1"/' > /var/software_versions.txt
- type: nextflow
22 changes: 22 additions & 0 deletions src/bgzip/help.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
```bash
bgzip -h
```

Version: 1.19
Usage: bgzip [OPTIONS] [FILE] ...
Options:
-b, --offset INT decompress at virtual file pointer (0-based uncompressed offset)
-c, --stdout write on standard output, keep original files unchanged
-d, --decompress decompress
-f, --force overwrite files without asking
-g, --rebgzip use an index file to bgzip a file
-h, --help give this help
-i, --index compress and create BGZF index
-I, --index-name FILE name of BGZF index file [file.gz.gzi]
-k, --keep don't delete input files during operation
-l, --compress-level INT Compression level to use when compressing; 0 to 9, or -1 for default [-1]
-r, --reindex (re)index compressed file
-s, --size INT decompress INT bytes (uncompressed size)
-t, --test test integrity of compressed file
--binary Don't align blocks with text lines
-@, --threads INT number of compression threads to use [1]
10 changes: 10 additions & 0 deletions src/bgzip/test_data/script.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# bgzip test data

# Test data was obtained from https://github.com/snakemake/snakemake-wrappers/tree/master/bio/bgzip/test.

if [ ! -d /tmp/snakemake-wrappers ]; then
git clone --depth 1 --single-branch --branch master https://github.com/snakemake/snakemake-wrappers /tmp/snakemake-wrappers
fi

cp -r /tmp/snakemake-wrappers/bio/bgzip/test/* src/bgzip/test_data

23 changes: 23 additions & 0 deletions src/bgzip/test_data/test.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
##fileformat=VCFv4.0
##fileDate=20090805
##source=https://www.internationalgenome.org/wiki/Analysis/vcf4.0/
##reference=1000GenomesPilot-NCBI36
##phasing=partial
##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
##INFO=<ID=AF,Number=.,Type=Float,Description="Allele Frequency">
##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele">
##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129">
##INFO=<ID=H2,Number=0,Type=Flag,Description="HapMap2 membership">
##FILTER=<ID=q10,Description="Quality below 10">
##FILTER=<ID=s50,Description="Less than 50% of samples have data">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003
20 14370 rs6054257 G A 29 PASS NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:.,.
20 17330 . T A 3 q10 NS=3;DP=11;AF=0.017 GT:GQ:DP:HQ 0|0:49:3:58,50 0|1:3:5:65,3 0/0:41:3
20 1110696 rs6040355 A G,T 67 PASS NS=2;DP=10;AF=0.333,0.667;AA=T;DB GT:GQ:DP:HQ 1|2:21:6:23,27 2|1:2:0:18,2 2/2:35:4
20 1230237 . T . 47 PASS NS=3;DP=13;AA=T GT:GQ:DP:HQ 0|0:54:7:56,60 0|0:48:4:51,51 0/0:61:2
20 1234567 microsat1 GTCT G,GTACT 50 PASS NS=3;DP=9;AA=G GT:GQ:DP 0/1:35:4 0/2:17:2 1/1:40:3
Loading

0 comments on commit 8c8ad10

Please sign in to comment.