-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
35 changed files
with
3,457 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,250 @@ | ||
name: bcftools_annotate | ||
namespace: bcftools | ||
description: | | ||
Add or remove annotations from a VCF/BCF file. | ||
keywords: [Annotate, VCF, BCF] | ||
links: | ||
homepage: https://samtools.github.io/bcftools/ | ||
documentation: https://samtools.github.io/bcftools/bcftools.html#annotate | ||
repository: https://github.com/samtools/bcftools | ||
issue_tracker: https://github.com/samtools/bcftools/issues | ||
references: | ||
doi: https://doi.org/10.1093/gigascience/giab008 | ||
license: MIT/Expat, GNU | ||
requirements: | ||
commands: [bcftools] | ||
authors: | ||
- __merge__: /src/_authors/theodoro_gasperin.yaml | ||
roles: [author] | ||
|
||
argument_groups: | ||
- name: Inputs | ||
arguments: | ||
- name: --input | ||
alternatives: -i | ||
type: file | ||
multiple: true | ||
description: Input VCF/BCF file. | ||
required: true | ||
|
||
- name: Outputs | ||
arguments: | ||
- name: --output | ||
alternatives: -o | ||
direction: output | ||
type: file | ||
description: Output annotated file. | ||
required: true | ||
|
||
- name: Options | ||
description: | | ||
For examples on how to use use bcftools annotate see http://samtools.github.io/bcftools/howtos/annotate.html. | ||
For more details on the options see https://samtools.github.io/bcftools/bcftools.html#annotate. | ||
arguments: | ||
|
||
- name: --annotations | ||
alternatives: --a | ||
type: file | ||
description: | | ||
VCF file or tabix-indexed FILE with annotations: CHR\tPOS[\tVALUE]+ . | ||
- name: --columns | ||
alternatives: --c | ||
type: string | ||
description: | | ||
List of columns in the annotation file, e.g. CHROM,POS,REF,ALT,-,INFO/TAG. | ||
See man page for details. | ||
- name: --columns_file | ||
alternatives: --C | ||
type: file | ||
description: | | ||
Read -c columns from FILE, one name per row, with optional --merge_logic TYPE: NAME[ TYPE]. | ||
- name: --exclude | ||
alternatives: --e | ||
type: string | ||
description: | | ||
Exclude sites for which the expression is true. | ||
See https://samtools.github.io/bcftools/bcftools.html#expressions for details. | ||
example: 'QUAL >= 30 && DP >= 10' | ||
|
||
- name: --force | ||
type: boolean_true | ||
description: | | ||
continue even when parsing errors, such as undefined tags, are encountered. | ||
Note this can be an unsafe operation and can result in corrupted BCF files. | ||
If this option is used, make sure to sanity check the result thoroughly. | ||
- name: --header_line | ||
alternatives: --H | ||
type: string | ||
description: | | ||
Header line which should be appended to the VCF header, can be given multiple times. | ||
- name: --header_lines | ||
alternatives: --h | ||
type: file | ||
description: | | ||
File with header lines to append to the VCF header. | ||
For example: | ||
##INFO=<ID=NUMERIC_TAG,Number=1,Type=Integer,Description="Example header line"> | ||
##INFO=<ID=STRING_TAG,Number=1,Type=String,Description="Yet another header line"> | ||
- name: --set_id | ||
alternatives: --I | ||
type: string | ||
description: | | ||
Set ID column using a `bcftools query`-like expression, see man page for details. | ||
- name: --include | ||
type: string | ||
description: | | ||
Select sites for which the expression is true. | ||
See https://samtools.github.io/bcftools/bcftools.html#expressions for details. | ||
example: 'QUAL >= 30 && DP >= 10' | ||
|
||
- name: --keep_sites | ||
alternatives: --k | ||
type: boolean_true | ||
description: | | ||
Leave --include/--exclude sites unchanged instead of discarding them. | ||
- name: --merge_logic | ||
alternatives: --l | ||
type: string | ||
choices: | ||
description: | | ||
When multiple regions overlap a single record, this option defines how to treat multiple annotation values. | ||
See man page for more details. | ||
- name: --mark_sites | ||
alternatives: --m | ||
type: string | ||
description: | | ||
Annotate sites which are present ("+") or absent ("-") in the -a file with a new INFO/TAG flag. | ||
- name: --min_overlap | ||
type: string | ||
description: | | ||
Minimum overlap required as a fraction of the variant in the annotation -a file (ANN), | ||
in the target VCF file (:VCF), or both for reciprocal overlap (ANN:VCF). | ||
By default overlaps of arbitrary length are sufficient. | ||
The option can be used only with the tab-delimited annotation -a file and with BEG and END columns present. | ||
- name: --no_version | ||
type: boolean_true | ||
description: | | ||
Do not append version and command line information to the output VCF header. | ||
- name: --output_type | ||
alternatives: --O | ||
type: string | ||
choices: ['u', 'z', 'b', 'v'] | ||
description: | | ||
Output type: | ||
u: uncompressed BCF | ||
z: compressed VCF | ||
b: compressed BCF | ||
v: uncompressed VCF | ||
- name: --pair_logic | ||
type: string | ||
choices: ['snps', 'indels', 'both', 'all', 'some', 'exact'] | ||
description: | | ||
Controls how to match records from the annotation file to the target VCF. | ||
Effective only when -a is a VCF or BCF file. | ||
The option replaces the former uninuitive --collapse. | ||
See Common Options for more. | ||
- name: --regions | ||
alternatives: --r | ||
type: string | ||
description: | | ||
Restrict to comma-separated list of regions. | ||
Following formats are supported: chr|chr:pos|chr:beg-end|chr:beg-[,…]. | ||
example: '20:1000000-2000000' | ||
|
||
- name: --regions_file | ||
alternatives: --R | ||
type: file | ||
description: | | ||
Restrict to regions listed in a file. | ||
Regions can be specified either on a VCF, BED, or tab-delimited file (the default). | ||
For more information check manual. | ||
- name: --regions_overlap | ||
type: string | ||
choices: ['pos', 'record', 'variant', '0', '1', '2'] | ||
description: | | ||
This option controls how overlapping records are determined: | ||
set to 'pos' or '0' if the VCF record has to have POS inside a region (this corresponds to the default behavior of -t/-T); | ||
set to 'record' or '1' if also overlapping records with POS outside a region should be included (this is the default behavior of -r/-R, | ||
and includes indels with POS at the end of a region, which are technically outside the region); | ||
or set to 'variant' or '2' to include only true overlapping variation (compare the full VCF representation "TA>T-" vs the true sequence variation "A>-"). | ||
- name: --rename_annotations | ||
type: file | ||
description: | | ||
Rename annotations: TYPE/old\tnew, where TYPE is one of FILTER,INFO,FORMAT. | ||
- name: --rename_chromosomes | ||
type: file | ||
description: | | ||
Rename chromosomes according to the map in file, with "old_name new_name\n" pairs | ||
separated by whitespaces, each on a separate line. | ||
- name: --samples | ||
type: string | ||
description: | | ||
Subset of samples to annotate. | ||
See also https://samtools.github.io/bcftools/bcftools.html#common_options. | ||
- name: --samples_file | ||
type: file | ||
description: | | ||
Subset of samples to annotate in file format. | ||
See also https://samtools.github.io/bcftools/bcftools.html#common_options. | ||
- name: --single_overlaps | ||
type: boolean_true | ||
description: | | ||
Use this option to keep memory requirements low with very large annotation files. | ||
Note, however, that this comes at a cost, only single overlapping intervals are considered in this mode. | ||
This was the default mode until the commit af6f0c9 (Feb 24 2019). | ||
- name: --remove | ||
alternatives: --x | ||
type: string | ||
description: | | ||
List of annotations to remove. | ||
Use "FILTER" to remove all filters or "FILTER/SomeFilter" to remove a specific filter. | ||
Similarly, "INFO" can be used to remove all INFO tags and "FORMAT" to remove all FORMAT tags except GT. | ||
To remove all INFO tags except "FOO" and "BAR", use "^INFO/FOO,INFO/BAR" (and similarly for FORMAT and FILTER). | ||
"INFO" can be abbreviated to "INF" and "FORMAT" to "FMT". | ||
resources: | ||
- type: bash_script | ||
path: script.sh | ||
|
||
test_resources: | ||
- type: bash_script | ||
path: test.sh | ||
|
||
engines: | ||
- type: docker | ||
image: debian:stable-slim | ||
setup: | ||
- type: apt | ||
packages: [bcftools, procps] | ||
- type: docker | ||
run: | | ||
echo "bcftools: \"$(bcftools --version | grep 'bcftools' | sed -n 's/^bcftools //p')\"" > /var/software_versions.txt | ||
test_setup: | ||
- type: apt | ||
packages: [tabix] | ||
|
||
runners: | ||
- type: executable | ||
- type: nextflow | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
``` | ||
bcftools annotate -h | ||
``` | ||
|
||
annotate: option requires an argument -- 'h' | ||
|
||
About: Annotate and edit VCF/BCF files. | ||
Usage: bcftools annotate [options] VCF | ||
|
||
Options: | ||
-a, --annotations FILE VCF file or tabix-indexed FILE with annotations: CHR\tPOS[\tVALUE]+ | ||
-c, --columns LIST List of columns in the annotation file, e.g. CHROM,POS,REF,ALT,-,INFO/TAG. See man page for details | ||
-C, --columns-file FILE Read -c columns from FILE, one name per row, with optional --merge-logic TYPE: NAME[ TYPE] | ||
-e, --exclude EXPR Exclude sites for which the expression is true (see man page for details) | ||
--force Continue despite parsing error (at your own risk!) | ||
-H, --header-line STR Header line which should be appended to the VCF header, can be given multiple times | ||
-h, --header-lines FILE Lines which should be appended to the VCF header | ||
-I, --set-id [+]FORMAT Set ID column using a `bcftools query`-like expression, see man page for details | ||
-i, --include EXPR Select sites for which the expression is true (see man page for details) | ||
-k, --keep-sites Leave -i/-e sites unchanged instead of discarding them | ||
-l, --merge-logic TAG:TYPE Merge logic for multiple overlapping regions (see man page for details), EXPERIMENTAL | ||
-m, --mark-sites [+-]TAG Add INFO/TAG flag to sites which are ("+") or are not ("-") listed in the -a file | ||
--min-overlap ANN:VCF Required overlap as a fraction of variant in the -a file (ANN), the VCF (:VCF), or reciprocal (ANN:VCF) | ||
--no-version Do not append version and command line to the header | ||
-o, --output FILE Write output to a file [standard output] | ||
-O, --output-type u|b|v|z[0-9] u/b: un/compressed BCF, v/z: un/compressed VCF, 0-9: compression level [v] | ||
--pair-logic STR Matching records by <snps|indels|both|all|some|exact>, see man page for details [some] | ||
-r, --regions REGION Restrict to comma-separated list of regions | ||
-R, --regions-file FILE Restrict to regions listed in FILE | ||
--regions-overlap 0|1|2 Include if POS in the region (0), record overlaps (1), variant overlaps (2) [1] | ||
--rename-annots FILE Rename annotations: TYPE/old\tnew, where TYPE is one of FILTER,INFO,FORMAT | ||
--rename-chrs FILE Rename sequences according to the mapping: old\tnew | ||
-s, --samples [^]LIST Comma separated list of samples to annotate (or exclude with "^" prefix) | ||
-S, --samples-file [^]FILE File of samples to annotate (or exclude with "^" prefix) | ||
--single-overlaps Keep memory low by avoiding complexities arising from handling multiple overlapping intervals | ||
-x, --remove LIST List of annotations (e.g. ID,INFO/DP,FORMAT/DP,FILTER) to remove (or keep with "^" prefix). See man page for details | ||
--threads INT Number of extra output compression threads [0] | ||
|
||
Examples: | ||
http://samtools.github.io/bcftools/howtos/annotate.html | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
#!/bin/bash | ||
|
||
## VIASH START | ||
## VIASH END | ||
|
||
# Exit on error | ||
set -eo pipefail | ||
|
||
# Unset parameters | ||
unset_if_false=( | ||
par_force | ||
par_keep_sites | ||
par_no_version | ||
par_single_overlaps | ||
) | ||
|
||
for par in ${unset_if_false[@]}; do | ||
test_val="${!par}" | ||
[[ "$test_val" == "false" ]] && unset $par | ||
done | ||
|
||
# Execute bcftools annotate with the provided arguments | ||
bcftools annotate \ | ||
${par_annotations:+-a "$par_annotations"} \ | ||
${par_columns:+-c "$par_columns"} \ | ||
${par_columns_file:+-C "$par_columns_file"} \ | ||
${par_exclude:+-e "$par_exclude"} \ | ||
${par_force:+--force} \ | ||
${par_header_line:+-H "$par_header_line"} \ | ||
${par_header_lines:+-h "$par_header_lines"} \ | ||
${par_set_id:+-I "$par_set_id"} \ | ||
${par_include:+-i "$par_include"} \ | ||
${par_keep_sites:+-k} \ | ||
${par_merge_logic:+-l "$par_merge_logic"} \ | ||
${par_mark_sites:+-m "$par_mark_sites"} \ | ||
${par_min_overlap:+--min-overlap "$par_min_overlap"} \ | ||
${par_no_version:+--no-version} \ | ||
${par_samples_file:+-S "$par_samples_file"} \ | ||
${par_output_type:+-O "$par_output_type"} \ | ||
${par_pair_logic:+--pair-logic "$par_pair_logic"} \ | ||
${par_regions:+-r "$par_regions"} \ | ||
${par_regions_file:+-R "$par_regions_file"} \ | ||
${par_regions_overlap:+--regions-overlap "$par_regions_overlap"} \ | ||
${par_rename_annotations:+--rename-annots "$par_rename_annotations"} \ | ||
${par_rename_chromosomes:+--rename-chrs "$par_rename_chromosomes"} \ | ||
${par_samples:+-s "$par_samples"} \ | ||
${par_single_overlaps:+--single-overlaps} \ | ||
${par_threads:+--threads "$par_threads"} \ | ||
${par_remove:+-x "$par_remove"} \ | ||
-o $par_output \ | ||
$par_input | ||
|
||
|
||
|
Oops, something went wrong.