From 4efab4f6319546d0d0abf70d9c04c29efc744f2d Mon Sep 17 00:00:00 2001 From: tgaspe Date: Thu, 29 Aug 2024 09:05:44 +0200 Subject: [PATCH] Initial Commit --- src/bcftools/bcftools_norm/config.vsh.yaml | 253 +++++++++++++++++++++ src/bcftools/bcftools_norm/script.sh | 0 src/bcftools/bcftools_norm/test.sh | 0 3 files changed, 253 insertions(+) create mode 100644 src/bcftools/bcftools_norm/config.vsh.yaml create mode 100644 src/bcftools/bcftools_norm/script.sh create mode 100644 src/bcftools/bcftools_norm/test.sh diff --git a/src/bcftools/bcftools_norm/config.vsh.yaml b/src/bcftools/bcftools_norm/config.vsh.yaml new file mode 100644 index 00000000..174e9568 --- /dev/null +++ b/src/bcftools/bcftools_norm/config.vsh.yaml @@ -0,0 +1,253 @@ +name: bcftools_norm +namespace: bcftools +description: | + Left-align and normalize indels, check if REF alleles match the reference, split multiallelic sites into multiple rows; + recover multiallelics from multiple rows. +keywords: [Annotate, VCF, BCF] +links: + homepage: https://samtools.github.io/bcftools/ + documentation: https://samtools.github.io/bcftools/bcftools.html#norm + repository: https://github.com/samtools/bcftools + issue_tracker: https://github.com/samtools/bcftools/issues +references: + doi: https://doi.org/10.1093/gigascience/giab008 +license: MIT/Expat, GNU +requirements: + commands: [bcftools] +authors: + - __merge__: /src/_authors/theodoro_gasperin.yaml + roles: [ author, maintainer ] + +argument_groups: + - name: Inputs + arguments: + - name: --input + alternatives: -i + type: file + multiple: true + description: Input VCF/BCF file. + required: true + + - name: Outputs + arguments: + - name: --output + alternatives: -o + direction: output + type: file + description: Output annotated file. + required: true + + - name: Options + description: | + For examples on how to use use bcftools annotate see http://samtools.github.io/bcftools/howtos/annotate.html. + For more details on the options see https://samtools.github.io/bcftools/bcftools.html#annotate. + arguments: + + - name: --annotations + alternatives: --a + type: file + description: | + VCF file or tabix-indexed FILE with annotations: CHR\tPOS[\tVALUE]+ . + + - name: --columns + alternatives: --c + type: string + description: | + List of columns in the annotation file, e.g. CHROM,POS,REF,ALT,-,INFO/TAG. + See man page for details. + + - name: --columns_file + alternatives: --C + type: file + description: | + Read -c columns from FILE, one name per row, with optional --merge_logic TYPE: NAME[ TYPE]. + + - name: --exclude + alternatives: --e + type: string + description: | + Exclude sites for which the expression is true. + See https://samtools.github.io/bcftools/bcftools.html#expressions for details. + + - name: --force + type: boolean_true + description: | + continue even when parsing errors, such as undefined tags, are encountered. + Note this can be an unsafe operation and can result in corrupted BCF files. + If this option is used, make sure to sanity check the result thoroughly. + + - name: --header_line + alternatives: --H + type: string + description: | + Header line which should be appended to the VCF header, can be given multiple times. + + - name: --header_lines + alternatives: --h + type: file + description: | + File with header lines to append to the VCF header. + For example: + ##INFO= + ##INFO= + + - name: --set_id + alternatives: --I + type: string + description: | + Set ID column using a `bcftools query`-like expression, see man page for details. + + - name: --include + type: string + description: | + Select sites for which the expression is true. + See https://samtools.github.io/bcftools/bcftools.html#expressions for details. + + - name: --keep_sites + alternatives: --k + type: boolean_true + description: | + Leave --include/--exclude sites unchanged instead of discarding them. + + - name: --merge_logic + alternatives: --l + type: string + description: | + When multiple regions overlap a single record, this option defines how to treat multiple annotation values. + See man page for more details. + + - name: --mark_sites + alternatives: --m + type: string + description: | + Annotate sites which are present ("+") or absent ("-") in the -a file with a new INFO/TAG flag. + + - name: --min_overlap + type: string + description: | + Minimum overlap required as a fraction of the variant in the annotation -a file (ANN), + in the target VCF file (:VCF), or both for reciprocal overlap (ANN:VCF). + By default overlaps of arbitrary length are sufficient. + The option can be used only with the tab-delimited annotation -a file and with BEG and END columns present. + + - name: --no_version + type: boolean_true + description: | + Do not append version and command line information to the output VCF header. + + - name: --output_type + alternatives: --O + type: string + choices: ['u', 'z', 'b', 'v'] + description: | + Output type: + u: uncompressed BCF + z: compressed VCF + b: compressed BCF + v: uncompressed VCF + + - name: --pair_logic + type: string + choices: ['snps', 'indels', 'both', 'all', 'some', 'exact'] + description: | + Controls how to match records from the annotation file to the target VCF. + Effective only when -a is a VCF or BCF file. + The option replaces the former uninuitive --collapse. + See Common Options for more. + + - name: --regions + alternatives: --r + type: string + description: | + Restrict to comma-separated list of regions. + See man page for details. + + - name: --regions_file + alternatives: --R + type: string + description: | + Restrict to regions listed in a file. + See man page for details. + + - name: --regions_overlap + type: string + choices: ['pos', 'record', 'variant', '1', '2', '3'] + description: | + This option controls how overlapping records are determined: + set to pos or 0 if the VCF record has to have POS inside a region (this corresponds to the default behavior of -t/-T); + set to record or 1 if also overlapping records with POS outside a region should be included (this is the default behavior of -r/-R, + and includes indels with POS at the end of a region, which are technically outside the region); + or set to variant or 2 to include only true overlapping variation (compare the full VCF representation "TA>T-" vs the true sequence variation "A>-"). + + - name: --rename_annotations + type: file + description: | + Rename annotations: TYPE/old\tnew, where TYPE is one of FILTER,INFO,FORMAT. + + - name: --rename_chromosomes + type: file + description: | + Rename chromosomes according to the map in file, with "old_name new_name\n" pairs + separated by whitespaces, each on a separate line. + + - name: --samples + type: string + description: | + Subset of samples to annotate. + See also https://samtools.github.io/bcftools/bcftools.html#common_options. + + - name: --samples_file + type: file + description: | + Subset of samples to annotate in file format. + See also https://samtools.github.io/bcftools/bcftools.html#common_options. + + - name: --single_overlaps + type: boolean_true + description: | + Use this option to keep memory requirements low with very large annotation files. + Note, however, that this comes at a cost, only single overlapping intervals are considered in this mode. + This was the default mode until the commit af6f0c9 (Feb 24 2019). + + - name: --threads + type: integer + description: | + Number of extra output compression threads. + See https://samtools.github.io/bcftools/bcftools.html#common_options. + + - name: --remove + alternatives: --x + type: string + description: | + List of annotations to remove. + Use "FILTER" to remove all filters or "FILTER/SomeFilter" to remove a specific filter. + Similarly, "INFO" can be used to remove all INFO tags and "FORMAT" to remove all FORMAT tags except GT. + To remove all INFO tags except "FOO" and "BAR", use "^INFO/FOO,INFO/BAR" (and similarly for FORMAT and FILTER). + "INFO" can be abbreviated to "INF" and "FORMAT" to "FMT". + +resources: + - type: bash_script + path: script.sh + +test_resources: + - type: bash_script + path: test.sh + +engines: + - type: docker + image: debian:stable-slim + setup: + - type: apt + packages: [bcftools, procps] + - type: docker + run: | + echo "bcftools: \"$(bcftools --version | grep 'bcftools' | sed -n 's/^bcftools //p')\"" > /var/software_versions.txt + test_setup: + - type: apt + packages: [tabix] + +runners: + - type: executable + - type: nextflow + + diff --git a/src/bcftools/bcftools_norm/script.sh b/src/bcftools/bcftools_norm/script.sh new file mode 100644 index 00000000..e69de29b diff --git a/src/bcftools/bcftools_norm/test.sh b/src/bcftools/bcftools_norm/test.sh new file mode 100644 index 00000000..e69de29b