From 03b8368e394f5e058ce575256fdc65bf727c9053 Mon Sep 17 00:00:00 2001 From: Chris Tomkins-Tinch Date: Wed, 25 Jul 2018 12:02:03 -0400 Subject: [PATCH] add workflows for isnvs_merge_to_vcf (#864) * add workflows for isnvs_merge_to_vcf and isnvs_merge_to_vcf_filtered * consolidate isnv workflows * include multiple alignment as part of isnv merge_to_vcf * isnvs_per_sample: also remove ".mapped" from bam file used to infer sample name * make emaill address optional for isnvs_vcf WDL (and for snpEff) * infer snpEff accessions from ref fasta if not provided * wdl corrections * WIP * chain commands with &&; accession parse correction * set -ex -o pipefail * comment change * update snpEff 4.1l -> 4.3.1t * add snpEff integration test * comment out custom tmpdir test fixtures * try older tmpdir fixtures * replace tmpdir_function fixture with tempfile.gettempdir() * revert; incorporate changes from 0191d68 (@notestaff) * only check final snp_eff tabular output * disable stdout capture by pytest for snpeff test * ignore BOM in open_or_gzopen https://en.wikipedia.org/wiki/Byte_order_mark * typo correction * debug * io open * kwarg reorder * debug * revert utf-8-sig * remove debug * correct git merge butchery * skip test_snpeff for now errors occur on Travis that so far have not been reproducible locally * dev notes * allow passage of stderr handle in run_and_print allow passage of stderr handle in run_and_print, defaulting to stdout redirection as before if not specified * stderr=subprocess.STDOUT default in run_and_print * py27 doesn't have subprocess.DEVNULL (thanks, @notestaff) * correcting late night mistakes * assertAlmostEqual for floats in snpEff test * maintain consistent order of inferred sample name w/r/t isnv files * WIP * add --emailAddress syntax to snpEff snakemake rule * WIP * WIP * remove test skipIf --- DEVELOPMENT_NOTES.md | 1 + conftest.py | 2 +- docker/rundocker.sh | 2 +- illumina.py | 1 - interhost.py | 2 +- intrahost.py | 37 +- pipes/WDL/workflows/isnvs_merge_to_vcf.wdl | 18 + pipes/WDL/workflows/tasks/tasks_intrahost.wdl | 68 +-- pipes/rules/intrahost.rules | 4 +- read_utils.py | 10 +- requirements-conda.txt | 2 +- test/__init__.py | 2 +- test/input/TestSnpEff/RBV16.fasta | 200 +++++++++ test/input/TestSnpEff/ann_eff.txt.gz | Bin 0 -> 911 bytes test/input/TestSnpEff/ann_eff.vcf.gz | Bin 0 -> 1779 bytes test/input/TestSnpEff/ann_eff.vcf.gz.tbi | Bin 0 -> 111 bytes test/input/TestSnpEff/merged.vcf.gz | Bin 0 -> 768 bytes test/input/TestSnpEff/merged.vcf.gz.tbi | Bin 0 -> 109 bytes test/input/TestSnpEff/msa.fasta | 400 ++++++++++++++++++ .../TestSnpEff/ref-rabies-JQ685920.fasta | 200 +++++++++ .../TestSnpEff/vphaser2.RBV16.mapped.txt.gz | Bin 0 -> 510 bytes test/integration/test_intrahost.py | 70 +++ tools/snpeff.py | 55 +-- util/file.py | 2 +- util/misc.py | 8 +- 25 files changed, 981 insertions(+), 103 deletions(-) create mode 100644 pipes/WDL/workflows/isnvs_merge_to_vcf.wdl create mode 100644 test/input/TestSnpEff/RBV16.fasta create mode 100644 test/input/TestSnpEff/ann_eff.txt.gz create mode 100644 test/input/TestSnpEff/ann_eff.vcf.gz create mode 100644 test/input/TestSnpEff/ann_eff.vcf.gz.tbi create mode 100644 test/input/TestSnpEff/merged.vcf.gz create mode 100644 test/input/TestSnpEff/merged.vcf.gz.tbi create mode 100644 test/input/TestSnpEff/msa.fasta create mode 100644 test/input/TestSnpEff/ref-rabies-JQ685920.fasta create mode 100644 test/input/TestSnpEff/vphaser2.RBV16.mapped.txt.gz diff --git a/DEVELOPMENT_NOTES.md b/DEVELOPMENT_NOTES.md index 16e8738b4..b0ad2fefb 100644 --- a/DEVELOPMENT_NOTES.md +++ b/DEVELOPMENT_NOTES.md @@ -20,6 +20,7 @@ When upgrading the GATK to a new version: - in tools/gatk.py change TOOL_VERSION_TUPLE at the top - in travis/install-gatk.sh change GATK_VERSION at the top - in easy-deploy-script/easy-deploy-viral-ngs.sh +- in docker/rundocker.sh ### (Automated) testing [Travis CI](https://travis-ci.org/broadinstitute/viral-ngs) performs automated unit and integration tests for viral-ngs on each branch and pull request. Unit tests are run on each new branch commit, and longer integration tests are performed on pull requests to help ensure the stability of the `master` branch. Pull requests are gated to ensure merging to `master` is allowed only if all tests pass. The Travis configuration is specified in `.travis.yml`, and relies on files stored within `viral-ngs/travis/`. diff --git a/conftest.py b/conftest.py index af55e2930..52860ba64 100644 --- a/conftest.py +++ b/conftest.py @@ -120,4 +120,4 @@ def pytest_terminal_summary(self, terminalreporter, exitstatus): widths = [max(map(len, col)) for col in zip(*rows)] for row in rows: writer.write(" ".join((val.ljust(width) for val, width in zip(row, widths)))) - writer.line() + writer.line() \ No newline at end of file diff --git a/docker/rundocker.sh b/docker/rundocker.sh index f3e014434..66f55e2f4 100755 --- a/docker/rundocker.sh +++ b/docker/rundocker.sh @@ -3,7 +3,7 @@ # A wrapper script to run viral-ngs docker images # The following paths have to be modified according to end-user environment NOVOALIGN_PATH="/opt/novocraft" # Directory where novoalign.lic license file can befound -GATK_PATH="/opt/GenomeAnalysisTK-3.6" # Directory where the correct GATK jar file can be found +GATK_PATH="/opt/GenomeAnalysisTK-3.8" # Directory where the correct GATK jar file can be found IMAGE_HASH_OR_TAG="local/viral-ngs:1.16.0" # This can be found by running this command 'docker images' DATA_DIR="$1"; shift GID=$(id -g $USER) diff --git a/illumina.py b/illumina.py index fefa3ee7f..6ac812617 100755 --- a/illumina.py +++ b/illumina.py @@ -553,7 +553,6 @@ def get_flowcell(self): log.warn("The provided flowcell ID is longer than 15 characters. Is that correct?") return fc - @util.misc.memoize def _get_rundate_obj(self): """ Access the text of the node in the RunInfo.xml file diff --git a/interhost.py b/interhost.py index 935447eec..84910b415 100755 --- a/interhost.py +++ b/interhost.py @@ -328,7 +328,7 @@ def parser_snpEff(parser=argparse.ArgumentParser()): parser.add_argument("inVcf", help="Input VCF file") parser.add_argument("genomes", nargs='+', help="genome name (snpEff db name, or NCBI accessions)") parser.add_argument("outVcf", help="Output VCF file") - parser.add_argument("emailAddress", + parser.add_argument("--emailAddress", help="""Your email address. To access the Genbank CoreNucleotide database, NCBI requires you to specify your email address with each request. In case of excessive usage of the E-utilities, NCBI will attempt to contact diff --git a/intrahost.py b/intrahost.py index de5615f40..1d7c6d89e 100755 --- a/intrahost.py +++ b/intrahost.py @@ -485,7 +485,31 @@ def merge_to_vcf( guessed_samples = samplenames_from_isnvs + list(samplenames_from_alignments-(refnames|set(samplenames_from_isnvs))) log.info("guessed sample names %s" % guessed_samples) - samples = samples or guessed_samples + samples = samples if samples is not None and len(samples)>0 else guessed_samples + + samp_to_isnv = {} + # if we had to guess sample names, match them up to isnv files + if len(guessed_samples)>0: + matched_samples = [] + matched_isnv_files = [] + for sample in samples: + sample_found=False + for isnvs_file in isnvs: + for row in util.file.read_tabfile(isnvs_file): + if sample==sampleIDMatch(row[0]): + samp_to_isnv[sample] = isnvs_file + sample_found=True + matched_samples.append(sample) + matched_isnv_files.append(isnvs_file) + break + if sample_found: + break + samples = matched_samples + isnvs = matched_isnv_files + else: + samp_to_isnv = dict(zip(samples, isnvs)) + + log.info(samp_to_isnv) # get IDs and sequence lengths for reference sequence with util.file.open_or_gzopen(refFasta, 'r') as inf: @@ -567,13 +591,7 @@ def merge_to_vcf( # to the assemblies # if we had to guess samples only check that the number of isnv files == number of alignments - if len(guessed_samples)>0: - if not (number_of_aligned_sequences - 1) == num_isnv_files: - raise LookupError( - """The number of isnv files provided (%s) and must equal the number of sequences - seen in the alignment (%s) (plus an extra reference record in the alignment). - %s does not have the right number of sequences""" % (num_isnv_files,number_of_aligned_sequences - 1,fileName)) - else: + if len(guessed_samples)==0: if not (number_of_aligned_sequences - 1) == num_isnv_files == len(samples): raise LookupError( """The number of isnv files provided (%s) and must equal the number of sequences @@ -581,8 +599,6 @@ def merge_to_vcf( as well as the number of sample names provided (%s) %s does not have the right number of sequences""" % (num_isnv_files,number_of_aligned_sequences - 1,len(samples),fileName)) - samp_to_isnv = dict(zip(samples, isnvs)) - # one reference chrom at a time with open(refFasta, 'r') as inf: for ref_sequence in Bio.SeqIO.parse(inf, 'fasta'): @@ -611,6 +627,7 @@ def merge_to_vcf( for sampleName in samplesToUse: if seq.id == sampleName: samp_to_seqIndex[sampleName] = seq.seq.ungap('-') + break if not len(samp_to_seqIndex) == len(samplesToUse): raise LookupError( diff --git a/pipes/WDL/workflows/isnvs_merge_to_vcf.wdl b/pipes/WDL/workflows/isnvs_merge_to_vcf.wdl new file mode 100644 index 000000000..e0c68e19d --- /dev/null +++ b/pipes/WDL/workflows/isnvs_merge_to_vcf.wdl @@ -0,0 +1,18 @@ +import "tasks_interhost.wdl" as interhost +import "tasks_intrahost.wdl" as tasks_intrahost + +workflow isnvs_merge_to_vcf { + File reference_fasta + Array[File]+ assemblies_fasta # one per genome + + call interhost.multi_align_mafft_ref as mafft { + input: + reference_fasta = reference_fasta, + assemblies_fasta = assemblies_fasta + } + call tasks_intrahost.isnvs_vcf { + input: + perSegmentMultiAlignments = mafft.alignments_by_chr, + reference_fasta = reference_fasta + } +} diff --git a/pipes/WDL/workflows/tasks/tasks_intrahost.wdl b/pipes/WDL/workflows/tasks/tasks_intrahost.wdl index 43ee4e429..03e878f01 100644 --- a/pipes/WDL/workflows/tasks/tasks_intrahost.wdl +++ b/pipes/WDL/workflows/tasks/tasks_intrahost.wdl @@ -7,7 +7,7 @@ task isnvs_per_sample { Int? minReadsPerStrand Int? maxBias - String sample_name = basename(basename(mapped_bam, ".bam"), ".all") + String sample_name = basename(basename(basename(mapped_bam, ".bam"), ".all"), ".mapped") command { intrahost.py vphaser_one_sample \ @@ -29,61 +29,31 @@ task isnvs_per_sample { } } + task isnvs_vcf { Array[File] vphaser2Calls # vphaser output; ex. vphaser2.${sample}.txt.gz Array[File] perSegmentMultiAlignments # aligned_##.fasta, where ## is segment number File reference_fasta - Array[String] snpEffRef # list of accessions to build/find snpEff database + Array[String]? snpEffRef # list of accessions to build/find snpEff database Array[String]? sampleNames # list of sample names - String emailAddress # email address passed to NCBI if we need to download reference sequences + String? emailAddress # email address passed to NCBI if we need to download reference sequences + Boolean naiveFilter=false command { + set -ex -o pipefail + SAMPLES="${sep=' ' sampleNames}" if [ -n "$SAMPLES" ]; then SAMPLES="--samples $SAMPLES"; fi - intrahost.py merge_to_vcf \ - ${reference_fasta} \ - isnvs.vcf.gz \ - $SAMPLES \ - --isnvs ${sep=' ' vphaser2Calls} \ - --alignments ${sep=' ' perSegmentMultiAlignments} \ - --strip_chr_version \ - --parse_accession - - interhost.py snpEff \ - isnvs.vcf.gz \ - ${sep=' ' snpEffRef} \ - isnvs.annot.vcf.gz \ - ${emailAddress} - - intrahost.py iSNV_table \ - isnvs.annot.vcf.gz \ - isnvs.annot.txt.gz - } - - output { - Array[File] isnvFiles = ["isnvs.vcf.gz", "isnvs.vcf.gz.tbi", "isnvs.annot.vcf.gz", "isnvs.annot.txt.gz", "isnvs.annot.vcf.gz.tbi"] - } - runtime { - memory: "4 GB" - docker: "quay.io/broadinstitute/viral-ngs" - } -} - -task isnvs_vcf_filtered { - Array[File] vphaser2Calls # vphaser output; ex. vphaser2.${sample}.txt.gz - Array[File] perSegmentMultiAlignments # aligned_##.fasta, where ## is segment number - File reference_fasta + providedSnpRefAccessions="${sep=' ' snpEffRef}" + if [ -n "$providedSnpRefAccessions" ]; then + snpRefAccessions="$providedSnpRefAccessions"; + else + snpRefAccessions="$(python -c "from Bio import SeqIO; print(' '.join(list(s.id for s in SeqIO.parse('${reference_fasta}', 'fasta'))))")" + fi - Array[String] snpEffRef # list of accessions to build/find snpEff database - Array[String]? sampleNames # list of sample names - String emailAddress # email address passed to NCBI if we need to download reference sequences - Boolean naiveFilter - - command { - SAMPLES="${sep=' ' sampleNames}" - if [ -n "$SAMPLES" ]; then SAMPLES="--samples $SAMPLES"; fi + echo "snpRefAccessions: $snpRefAccessions" intrahost.py merge_to_vcf \ ${reference_fasta} \ @@ -92,18 +62,18 @@ task isnvs_vcf_filtered { --isnvs ${sep=' ' vphaser2Calls} \ --alignments ${sep=' ' perSegmentMultiAlignments} \ --strip_chr_version \ - ${'--naive_filter' + naiveFilter} \ + ${true="--naive_filter" false="" naiveFilter} \ --parse_accession - + interhost.py snpEff \ isnvs.vcf.gz \ - ${sep=' ' snpEffRef} \ + $snpRefAccessions \ isnvs.annot.vcf.gz \ - ${emailAddress} + ${'--emailAddress=' + emailAddress} intrahost.py iSNV_table \ isnvs.annot.vcf.gz \ - isnvs.annot.txt.gz \ + isnvs.annot.txt.gz } output { diff --git a/pipes/rules/intrahost.rules b/pipes/rules/intrahost.rules index 97792728d..b2113edd9 100644 --- a/pipes/rules/intrahost.rules +++ b/pipes/rules/intrahost.rules @@ -56,7 +56,7 @@ rule isnvs_vcf: UGER = config.get('UGER_queues', {}).get('short', '-l h_rt=04:00:00'), logid = "all", snpEff_ref = " ".join(config["accessions_for_ref_genome_build"]), - email_address = config["email_point_of_contact_for_ncbi"] + email_address = "--emailAddress "+config["email_point_of_contact_for_ncbi"] if config["email_point_of_contact_for_ncbi"] else "" run: shell("{config[bin_dir]}/intrahost.py merge_to_vcf {input.ref_genome} {output.raw_vcf}" + " --samples " + " ".join(read_samples_file(config["samples_assembly"])) @@ -92,7 +92,7 @@ rule isnvs_vcf_filtered: UGER = config.get('UGER_queues', {}).get('short', '-l h_rt=04:00:00'), logid = "all", snpEff_ref = " ".join(config["accessions_for_ref_genome_build"]), - emailAddress = config["email_point_of_contact_for_ncbi"], + email_address = "--emailAddress "+config["email_point_of_contact_for_ncbi"] if config["email_point_of_contact_for_ncbi"] else "", naiveFilter = "--naive_filter" if config["vcf_merge_naive_filter"] else "" run: shell("{config[bin_dir]}/intrahost.py merge_to_vcf {input.ref_genome} {output.raw_vcf}" diff --git a/read_utils.py b/read_utils.py index ab638a61d..969c7bd38 100755 --- a/read_utils.py +++ b/read_utils.py @@ -754,11 +754,11 @@ def main_reheader_bam(args): ''' Copy a BAM file (inBam to outBam) while renaming elements of the BAM header. The mapping file specifies which (key, old value, new value) mappings. For example: - LB lib1 lib_one - SM sample1 Sample_1 - SM sample2 Sample_2 - SM sample3 Sample_3 - CN broad BI + LB lib1 lib_one + SM sample1 Sample_1 + SM sample2 Sample_2 + SM sample3 Sample_3 + CN broad BI ''' # read mapping file mapper = dict((a + ':' + b, a + ':' + c) for a, b, c in util.file.read_tabfile(args.rgMap)) diff --git a/requirements-conda.txt b/requirements-conda.txt index 0ac3ecd0c..a760dca8c 100644 --- a/requirements-conda.txt +++ b/requirements-conda.txt @@ -22,7 +22,7 @@ picard=2.18.9 pigz=2.4 prinseq=0.20.4 samtools=1.7 -snpeff=4.1l +snpeff=4.3.1t spades=3.11.1 tbl2asn=25.6 trimmomatic=0.38 diff --git a/test/__init__.py b/test/__init__.py index d9489fe4f..c85b8866d 100644 --- a/test/__init__.py +++ b/test/__init__.py @@ -118,4 +118,4 @@ def inputs(self, *fnames): def assert_none_executable(): testDir = os.path.dirname(__file__) assert all(not os.access(os.path.join(testDir, filename), os.X_OK) for filename in os.listdir(testDir) - if filename.endswith('.py')) + if filename.endswith('.py')) \ No newline at end of file diff --git a/test/input/TestSnpEff/RBV16.fasta b/test/input/TestSnpEff/RBV16.fasta new file mode 100644 index 000000000..f87504252 --- /dev/null +++ b/test/input/TestSnpEff/RBV16.fasta @@ -0,0 +1,200 @@ +>RBV16-1 +ACAAAATCAGAGAAGAAGTAGACAGTATCATCTACAAAAAGAAAATGTAACACCTCTACA +ATGGATACCGACAAAATTGTATTCAAAGTCAATAACCAGGTTGTCTCTCTAAAACCTGAA +ATTATTGTAGATCAATATGAGTACAAATACCCGGCTATCAAAGACTTAAAAAAGCCCAGT +ATCTCCCTGGGAAAAGCTCCTGATTTGAACAAGGCGTATAAGTCAATTTTGTCCGGTATG +AATGCAGCTAAGCTCGACCCTGACGATGTGTGCTCTTACTTGGCAGCTGCAATGCAGTTC +TTTGAAGGAACATGTCCAGAAGACTGGACTAGCTATGGAATCTTGATTGCAAGGAAAGGA +GACAAGATAACTCCAAACTCTCTCGTAGACATAAAACGTACAGATGTAGAAGGGAACTGG +GCTCTAACAGGAGGAATGGAGTTGACTAGGGATCCCACCATTCCAGAACATGCATCTTTG +GTTGGTCTTCTCCTGAGTTTGTATCGATTGAGCAAAATATCCGGACAGAACACAGGCAAT +TATAAGACAAATATCTCTGATCGTATAGAGCAGATTTTTGAGACGGCCCCCTTTGTGAAG +ATCGTGGAACATCACACTTTGATGACAACTCACAAAATGTGCGCTAACTGGAGCACCATA +CCGAACTTTAGATTCCTAGCCGGAACTTATGACATGTTTTTCTCTCGGATTGAACATCTA +TATTCAGCAATCAGAGTGGGTACAGTTGTCACTGCTTACGAGGACTGCTCAGGGCTAGTA +TCCTTTACAGGTTTTATAAAGCAGATAAACCTTACAGCAAAGGAAGCAATACTTTATTTC +TTCCACAAAAATTTTGAGGGAGAGATAAGAAGAATGTTTGAGCCGGGACAGGAAACTGCA +GTCCCTCACTCCTATTTCATCCATTTCCGGTCTTTGGGCCTTAGTGGGAAATCTCCATAT +TCGTCAAGTGCAGTTGGTCACGTGTTCAACCTCATTCACTTTGTTGGATGCTATATGGGT +CAAGTGAGGTCTTTGAATGCAACGGTTATTGCCACATGTGCTCCACATGAGATGTCTGTT +CTCGGGGGTTATTTGGGGGAGGAGTTCTTTGGCAAGGGGACTTTTGAGAGAAGATTCTTC +AGAAACGAGAAGGAACTTCAGGACTATGAAGCAGCAGAGTTGACAAAGACTGAGGTCGCT +CTGGCAGACGACGGAACAGTCAATTCTGACGATGAAGACTACTTCTCTGGTGAAACCAGG +AGTCCAGAGGCGGTCTATACTCGGATCATGATCAATGGGGGCCGACTCAAAAGATCACAT +ATAAGAAGGTATGTATCAGTCAGTTCCAATCATCAAGCTCGCCCCAATTCATTTGCTGAA +TTTCTAAACAAGACATATTCTAACGACCCGTAGGGAGTCGAACTTCAAGATTGTCAACAA +TAATAAATTGTTTAATTCCTCCACGAAAAAAACTAACACCCCTCCTTTTGAACCATCCCA +AGCATGAGCAAGATTTTTGTCAACCCAAGTGCGATCCGGGCCGGCCTTGCTGACCTAGAG +ATGGCAGAGGAGACTGTAGATCTGATTGCCAGGAACATCGAGGACAATCAGGCTCATCTC +CAAGGAGAGCCTATAGAAGTAGATAGTCTGCCCGATGACATGAGACGGCTCCACTTGGAT +AGTGAAAAACCGTCTGGTTTTGACAAGGTGACAAAAGAAGGGGAGAGCAAGTGTCATGAA +GACTTTCAGATGGATGAAGGGGAGGACCCCAGCCTCTTGTTCCAGTCATACCTGGATAAT +GTCGGAGTTCAAATAGTCAGGCAAATGAGGTCAGGAGAGAGATTCCTTAAAATATGGTCT +CAGACTGTGGAGGAGATCATATCCTATGTCATGATCAATTTCCCAGGCTCTTTGGGGAGG +CCTTCTGAAGACAAGGCCACTCAAACTGCCAATCGGGAACCCAAGAAAGGAGTGGCATCA +GTTTCGTCTCAACTCGAAGGTCAATCATCCAAAGCGAGAGCGGCAGCCCAAACTGCCTCC +GGTCCACCTGCCCTAGAATGGTCTGCCACCAACGAAGAAGACGATTTGTCTGTGGAGGCA +GAGATAGCTCATCAAATTGCCGAGAGCTTCTCCAAGAAGTACAAATTCCCCTCTCGATCA +TCAGGGATATTCTTGTATAATTTTGAGCAGTTAAAGATGAACCTTGATGACATAGTCAAG +GAATCAAAGAATGTGCCTGGTGTTACCCGCTTGGCCCATGAAGGGTCTAAACTCCCTCTA +AGATGTGTACTTGGGTGGGTTGCTCAAGCCAACTCCAGAAAGTTCCAGCTGCTGGTCGAG +CCTGACAAGCTAAATAAAATAATGCAAGACGACCTAAACCGTTATTTGTCTCGTCAACTA +AATTTGTAGCCTCAGTCCCATTATGCAATCAAAACCAGTCTGATCCAAAGTCAACGTGAA +AAAAACAGGCAACACCACTAATACAATGAATTTCCTACGCAAGATAGTGAAGAACTGTAG +AGATGAGGACGATCAGAAGCCTTCTCTCGCATCGGCCCCCCCAGATGATGATGATCTGTG +GTTGCCCCCTCCAGAATATGTCCCATTGAAGGAGCTCACAGGGAAGAAGAACATGAGAAA +CTTCTGTGTTAATGGGGAGGTCAAGGTGTGTAGTCCAAACGGATATTCCTTTAGGATCTT +GCGTCACATTCTGAAATCGTTCGATGAGATCTATTCCGGAAATCAAAGAATGATTGGGTT +AGTCAAAGTTGTGGTCGGATTGGCCTTGTCTGGAGCCCCAGTCCCCGAGGGCATGAACTG +GGTTTACAAACTAAGGAGAACTCTTATTTTCCAATGGGCAGACTCTAGGGGTCCTCTGGA +AGGGGAAGAGTTGGAGTACTCTCAGGAAATTACCTGGGACGATGACACTGAATTTGTAGG +GTTGCAAATAAGGGTGAGTGCTAGACAATGCCATATCCAAGGTAGAATCTGGTGCATTAA +CATGAACTCTAGAGCATGTCAATTGTGGTCTGATATGTCCCTTCAAACTCATAGATCTGA +GGAGGACAAAGACTCCTCAGTCCTTCTAGAGTAGTCGAATTATATCTCACAAGTTCCTCA +ATTGTCCACCTCTGGAGGAGAGAACACATGGGCTCAACTCCAACCTTCAGGAGCAATAGA +ACAAAAACATGTTATGGTGCCGTTGAATCGCTGCATTTTATCAGAGTCAAATCAATTACA +ATTGCACTTTAAGCCTCTTGGATGTGAAAAAAACTATTAACATCCCTCAAAAGACCTGAG +GAAAGATGGTCCCTTGGGCCCTTCTGTTTGTGCCTTTTCTGATCTCTTCATTGTGTTTTG +GGAAGTTCCCTATCTACACAATACCAGACAAGCTAGGTCCTTGGAGTCCTATCGACATAC +ATCATCTCAGTTGTCCCAATAATCTTGTTGTGGAAGACGAGGGGTGTAATAGTCTGTCGG +GGTTTTCTTACATGGAACTAAAAGTGGGATACATCTCTGCCATAAAAGTGAACGGGTTCA +CTTGTACTGGTGTCGTGACGGAAGCCGAGACCTACACTAACTTCGTCGGTTATGTCACCA +CCACATTCAAGAGAAAACACTTTCGCCCGATGCCCGATGCATGTAGATCCGCATACAATT +GGAAAATGGCAGGCGATCCTAGGTATGAAGAGTCCCTTCACAACCCATACCCTGATTATC +ATTGGCTGCGGACAGTTAAAACCACCAAGGAGTCTCTTATCATCATCTCGCCAAGTGTGG +CTGACCTAGACCCGTATGACAAATCCCTTCATTCAAGGATTTTTCCTGGGGGGAAATGCA +CGGGTCTAACAGTCTCTTCCACCTACTGCTCGACCAACCATGACTACACCATCTGGATGC +CTGAAAAAGCAGGGCTCGGGACATCTTGTGACATCTTCACCAATAGTAAAGGGAAGAGAG +CATCTAAAGGAGGCAAGACTTGTGGATTTGTGGACGAGAGAGGTTTGTATAAGTCCTTGA +AAGGAGCTTGTAAGCTCAAACTGTGCGGAGTTTCTGGGCTTAGACTTATGGATGGAACTT +GGGTTGCGATTCAGACATTGGATGAAACCAAATGGTGCTCTCCTGATCAACTGGTGAATC +TGCATGACTTCCACTCGGATGAGCTTGAGCATCTTGTTGTAGAGGAGTTGGTTAGAAAGA +GGGAGGAATGTCTGGATGCATTAGAATCCATCATGACCACCAAATCAGTAAGCTTCAGAC +GTCTAAGCCACCTGAGAAAACTAGTTCCTGGGTTTGGGAAGGCATACACCATATTCAACA +AAACCCTAATGGAGGCCGATGCCCACTACAAGTCCGTTCGGACTTGGAGCGAGATCATCC +CCTCAAAAGGGTGTTTGAGAGTAGGGGGGAGATGTCATCCTCATGTAAATGGAGTATTTT +TCAATGGCATCATTCTAGGTCCGGACGGGCATGTGTTGATCCCAGAAATGCAGTCATCCC +TTCTCCAACAACATGTGGAACTGTTGGAGTCCTCTGTGATCCCCCTCATGCACCCCCTGG +CAGATCCTTCAGCAGTTTTCAAAGATGGTGACGAGGCAGAGGATTTTGTTGAGGTTCATC +TCCCAGATGTTCACAAACAGATCTCAGGGGTGGACCTAGGCCTCCCAAGCTGGGGAAAGT +ATATGCTGATGATTGCAGGTGCTCTAACGACTCTAATGCTGTTCATCTTCTTGATGACAT +GTTGCAAAAGAGTCAACAGGACAAAGTCAATACGACAAGGCCCCAGAGAGATGGAAAGAA +AAGTATCATTTACTCCCCAAAACAAAAAAGTCGTATCTTCATGGGAGTCTTACAAGAGCG +GAGGCGAGACCAAGCTGTAAGGAGGGCTGCCCCCCCCACACACTATGCTCGGAAAACTGT +TCCCCTCTGATACATGGAAGAATGTCCTGCCTTGACAATCCCCCACGGACTCTGTCCAAC +AGGGTAAATTTCGAAGTCAAGAAACTTTCATCGATCATCTCACTCGACCAGACACATTCA +GGTGGACTTTGATGATGTATGAAGCATTTTTACAGTATCAGCGACTAATGGTGCTCTCAC +CCTCTAAGGACTGGTACTAAAGGTAGCGGACAGGCTGACTGACATCTCAGACAACCCTGT +TTCTTAGCTTGGGCAGAGGTTGTGATAAGCTCCTCTACCTTAAACTAAAACAGTGATCAG +CTGAGAAAACTGATTTGCCTCCTATGAAGGACACAAGCAATAGATCACAATCATCTCACA +TCCCAATAAGTTGTGCATAACTACAAAGGGCTGGGCCATCTAATCTCCTCAATCAAGAAA +AAAACTGTGAGACAGAGAAATCCTCAACAACACTTCTCATCCTGAACACTGCACCATGAT +GATTGATCCAGGGGAGGTTTATGATGACCCCATTGATCCAGTCGAGTCTGAGAGTGAGCC +AAGAGGAAACTCCAATATCCCCAACATCTTGCGAAATTCTGACTACAATCTCAACTCCCC +TCTGATCGAGGATTCAGCCAAGCTGATGTTAGAATGGCTGAAAACAGGAAATAGACCTCT +CCGGATGACCCTGACCGACAATTGTTCTCGGTCTTACAAAGTTCTGAAAGATTACCTCAA +GAAAGTGGATCTAGGATCTCTCAAAGTGGGCGGAGCTGCAGCACAATCTATGATCTCTCT +TTGGTTATATGGTGCTCACTCTGAATCAAATAGGAGCAGGAGGTGTATGACTGACTTAGC +TCAATTCTACTCCAAATCTTCCCCTATAGAGAAGCTGTTAAACTTCACACTCGGGAACAG +AGGGCTGAGAATCCCCCCTGAGGGAGTCTTAAGCTGTCTTGAGAGGGTGGATTACGATAA +GGCATTTGGGAGGTATTTGGCCAATATATATTCCTCCTATTTGTTCTTTCATGTGATTAC +CCTTTACATGAATGCCCTAGATTGGGATGAGGAGAAGACTATCTTGGCACTGTGGAGGGA +ATTGACATCAGTGGATATGGGAAAGGACTTGGTCAAGTTTAAAGATCAGATATGGGGACT +TCTGATTGTGACTAAAGACTTCATATATTCACAAAGCTCTCACTGTCTCTTTGACAGGAA +CTATACGCTTATGCTAAAAGACCTTTTTTTGTCCCGGTTCAACTCTCTGATGATTTTGCT +GTCCCCCCCGGAACCCAGATACTCAGATGACTTGATATCTCAGCTGTGTCAGCTGTACAT +AGCAGGAGATCAAGTGTTGTCCATGTGCGGAAACTCTGGTTATGAAGTCATAAAAATCTT +GGAGCCGTATGTTGTAAACAGCTTGGTCTATAGAGCGGAAAAATTCAGGCCTCTTATTCA +CTCTCTGGGGGACTTCCCTGTTTTTATAAAGGACAAGGTGAATCAACTTGAGGGGACGTT +TGGTCCTAGTGCGAAGAGATTCTTTAAAGTCTTGGATCAATTCGACAACATACATGATTT +GGTCTTTGTATACGGTTGTTACAGGCATTGGGGGCACCCTTACATAGACTATAGAAAGGG +TCTGTCAAAATTGTATGACCAGGTCCACATTAAAAAGGTGATAGATAAGTCCTATCAGGA +GTGTTTAGCGAGTGACCTGGCCAAAAGAATCCTCAGGTGGGGGTTCGACAAATACTCCAA +ATGGTATCTAGATCCACGACTTCTTGCGAGGGATCACCCCCTGACTCCTTATATCAAGAC +CCAGACATGGCCTCCCAAGCACATAGTGGATTTGGTAGGTGATACCTGGCATAGACTCCC +GATCACCCAGATCTTCGAGATCCCTGAATCAATGGACCCATCAGAGATATTAGATGACAA +ATCACACTCTTTCACTAGAACAAGGCTAGCGTCATGGCTTTCGGAAAACAGAGGAGGACC +GGTTCCAAGTGAGAAGGTCATTATTACGGCTCTCTCCAAACCGCCTGTCAACCCTAGAGA +GTTTTTGAAATCTATAGACCTTGGGGGGCTGCCAGACGAAGACTTAATCATCGGCTTGAA +ACCCAAAGAAAGGGAGCTGAAGATCGAAGGCCGATTTTTTGCTTTAATGTCTTGGAATTT +AAGGCTATACTTCGTTATTACAGAGAAGCTCCTGGCCAATTATATATTACCACTCTTTGA +TGCATTAACCATGACCGACAATTTGAACAAGGTGTTCAAAAAGTTGATTGATCGAGTCAC +CGGACAAGGGCTCTTAGATTATTCGAGGGTTACATATGCTTTTCACTTGGACTATGAGAA +GTGGAACAATCATCAGAGACTAGAATCGACTGAGGATGTATTCTCCGTCCTTGATCAAGT +GTTTGGATTAAAAAGGGTGTTCTCCAGAACTCATGAGTTTTTCCAGAAGTCTTGGATATA +TTACTCAGATAGATCTGATCTTATAGGATTGTGGGAGGACCAGATATACTGTTTGGACAT +GTCAAATGGCCCGACGTGCTGGAACGGCCAAGATGGCGGGTTAGAGGGGTTACGACAGAA +GGGCTGGAGTCTGGTCAGCCTACTGATGATAGATCGAGAGTCTCAGACCAGGAACACAAG +AACTAAGATACTAGCCCAGGGAGACAACCAAGTTCTATGTCCGACATACATGTTATCGCC +TGGGCTTTCTAGAGAGGGCCTCCTCTACGAGTTGGAGAGTATATCAAGAAACGCACTCTC +AATATATCGAGCCATCGAGGAAGGGGCATCTAAACTGGGGCTCATTATAAAGAAGGAGGA +GACAATGTGCAGCTATGACTTTCTCATTTATGGAAAAACTCCCTTATTTCGAGGTAACAT +TCTGGTGCCTGAATCCAAAAGGTGGGCTAGAGTCTCCTGTATCTCTAACGACCAAATAGT +CAACCTCGCTAATATAATGTCAACAGTGTCTACCAATGCTTTGACTGTTGCTCAACACTC +TCAGTCTTTGATTAAACCGATGAGGGACTTTCTGCTTATGTCAGTGCAAGCCGTTTTTCA +TTACTTGCTATTTAGCCCCATTTTAAAAGGCAGAGTTTACAAAATACTGAGTGCTGATGG +GGATCATTTTCTTTTAGCTATGTCTAGAATAGTTTACCTTGACCCCTCATTGGGGGGTGT +GTCTGGAATGTCTCTTGGGAGGTTTCACATACGCCAGTTTTCAGACCCGGTCTCTGAGGG +GTTATCTTTTTGGAGAGAGATTTGGTTAAGTTCAAATGAGTCTTGGATCCATGCACTTTG +TCAAGAAGCAGGAAATCCGGATTTGGGAGAGAGAACACTGGAAAGCTTCACACGCCTTCT +CGAGGATCCCACTACCCTGAATATAAAAGGAGGGGCCAGTCCAACCATTCTTCTCAAGGA +TGCAATCAGAAAGGCCCTGTATGATGAAGTGGATAAGGTGGAGAACTCCGAGTTTAGAGA +AGCAATCCTCTTGTCCAAGACCCATAGGGACAACTTTATACTTTTCTTGAGATCTGTTGA +GCCTCTCTTTCCTAGATTTCTCAGTGAACTCTTCAGCTCTTCCTTCCTGGGGATTCCTGA +ATCGATTATTGGGCTGATCCAAAATTCTAGAACGATAAGAAGACAGTTTAGGAAGAACCT +CTCTAGAACCTTAGAAGAGTCTTTCTATAACTCAGAGATTCATGGGATCAATCGGATGAC +ACAGACTCCCCAGAGAATAGGAAGAGTTTGGGCCTGCTCTTCTGAGAGGGCAGATCTCCT +AAGAGAAATCTCGTGGGGGAGAAAGGTGGTTGGTACAACCGTCCCTCACCCCTCTGAGAT +GTTGGGGTTGCTTCCTAAATCTTCAATCTCCTGTACTTGTGGCGCAACAGGAGGGGGAAA +TCCTCGAATATCAGTGTCTGTACTCCCGTCCTTTGATCAGTCGTTCTTTTCCAGAGGCCC +TCTGAAGGGATACCTGGGCTCATCCACATCTATGTCAACCCAGCTGTTCCATGCTTGGGA +GAAGGTCACCAATGTTCATGTGGTGAAAAGGGCCCTTTCACTCAAAGAGTCCATAAATTG +GTTCGTCACAAGAAACTCCAATTTGGCTCAAACTTTAATCAGGAATATAATGTCTCTGAC +GGGACCAGACTTCCCGCTGGAAGAGGCCCCCGTCTTTAAGAGAACAGGATCAGCTCTGCA +CAGGTTCAAGTCAGCTAGGTACAGTGAAGGGGGTTATTCCTCTGTCTGTCCCAATCTCCT +CTCTCATATTTCCGTCAGCACAGACACGATGTCTGACCTGACTCAAGATGGGAAGAACTA +TGACTTTATGTTCCAGCCGCTGATGCTTTACGCACAGACATGGACATCAGAACTGGTGCA +GAAGGACATACGGCTGAGAGACTCCACCTTTCACTGGCATCTTCGATGCAATAAGTGTAT +AAGGTCCATCGATGACATCACTCTAGAGACTTCCCAGATCTTTGAATTCCCGGATGTTTC +GAAAAGGATATCTAGGATGGTCTCTGGAGCAGTGCCTCACTTTCAAAAACTTCCTGATAT +TCGTCTAAGACCTGGAGACTTTGAGTCTCTAAGTGACAAAGAGAAGTCACGCCACATAGG +GTCGGCTCAGGGGCTCTTGTATTCGATCCTGGTTGCGATCCACGACTCTGGGTACAACGA +TGGAACCATTTTTCCTGTCAACATATATAGCAAGGTCTCCCCGAGAGACTATTTAAGAGG +TCTCGCCAGAGGAATCTTAATAGGGTCTTCTATCTGCTTTCTAACAAGAATGACAAACAT +CAACATCAACAGACCCCTTGAACTAATCTCAGGAGTGATCTCATACATCCTCCTTAGATT +AGACAATCACCCATCCTTATACATCATGCTCAGAGAACCGTCTCTGAGAGGAGAAATATT +TTCTATTCCCCAAAAGATCCCCGCCGCTTACCCAACCACAATGAAAGAGGGCAACAGATC +TATCCTGTGTTACCTCCAGCATGTGCTCCGCTATGAGCGGGAGGTAATTACAGCATCCCC +GGAGAATGATTGGTTATGGATCTTCTCTGACTTCAGAAGTTCCAAAATGACCTATTTAAC +CCTTATCACCTATCAGTCTCATCTTTTACTTCAGAGGGTTGAGAAGAATCTCTCTAAAAA +TATGAGGGCCAACCTACGACAGATGAGCTCTCTCATGAGACAAGTATTGGGCGGGCATGG +CGAAGACACCTTAGAATCAGACGAGGACATTCAGAGGTTGTTGAAAGACTCATTACGTAG +GACAAGATGGGTAGATCAAGAGGTGCGTCATGCAGCCAGAGCCATGACAGGGGGTTATAG +CCCCAATAAGAAGATGTCTCGCAAAGCGGGGTGTTCAGAATGGGTCTGCTCTGCCCAACA +GGTTGCTGTTTCAACCTCAGCAAATCCAGCCCCTGTCTCTGAGCTGGACATCAGGGCTCT +CTCCAAAAGACTTCAAAACCCGTTGATTTCAGGTCTGAGAGTTGTTCAGTGGGCAACAGG +CGCTCATTACAAACTCAAACCTATTCTTGATGATCTCAATGTGTTCCCATCCTTATGTCT +TGTGGTCGGAGACGGGTCAGGGGGGATCTCAAGGGCTGTACTCAATATGTTTCCGGACGC +TAGGCTCGTGTTCAACAGCCTGTTGGAAGTGAATGACCTGATGGCTTCAGGGACACATCC +GTTGCCTCCTTCAGCAATCATGAGCGGAGGGGATGATATCATATCTAGGGTGATTGACTT +TGACTCCATCTGGGAAAAGCCTTCTGACTTAAGGAACTTGACGACATGGAGGTACTTCCA +GTCGGTTCAAGAACAAGTGAATATGTCCTATGATCTTATTATTTGTGACGCAGAGGTCAC +TGACATCGCATCAATTAATCGAATAACTCTACTAATGTCTGATTTTGCATTATCAATAGA +CGGCCCACTTTATTTAGTTTTCAAAACTTATGGAACCATGCTCGTGAATCCTGATTACAG +AGCCGTTCAACATCTGTCCAGAGCATTTCCCACAGTCACAGGATTCATAACCCAGATGAC +ATCATCCTTCTCGTCTGAGCTATACCTCAGATTCTCTAAAAGGGGAAAATTTTTCCGAGA +TGCAGAATACTTGACTTCTTCTACTATTAGGGAGATGAGCCTTGTATTGTTCAACTGCAG +CAGCCCAAAAAGTGAGATGCAGAGGGCCCGCTCTCTGAATTACCAGGACCTTGTAAGAGG +ATTTCCTGAGGAGATCATATCCAACCCATATAATGAGATGATCATAACTCTGATTGACAG +TGATGTGGAATCTTTCCTGGTTCATAAGATGGTTGATGACCTAGAGTTGCAGCGAGGAAC +TTTGTCTAAAGTCTCCATTATCGTAGCCATCATGATAGTCTTTTCCAACAGGGTGTTTAA +TGTCTCGAAACCGTTGACTGACCCTTTATTCAATCCGCCATCTGACCCCAAAATCTTGAG +ACACTTCAATATTTGCTGCAGTACCATGATGTACTTGTCTACTGCTCTGGGGGATGTTCC +AAGCTTTGCTAGACTTCATGACCTGTATAACAGACCAATAACCTACTATTTCGGGAAGAA +AGTTATCCGAGGGAACATTTATTTATCCTGGAGTTGGTCTGACGACACTTCAGTGTTCAA +AAGGGTGGCTTGCAATTCTAGTTTGAGCCTCTCGGCTCACTGGATAAGGCTGATTTACAA +AATAGTGAAGACTACCAGACTTGTGGGAAATACGGAGAATCTATCCAGGGAGGTCGAAAA +GCACCTTCGAGGGTACAACAGGTGGATTACCCTAGACGACATAAAATCCAGATCATCTCT +TCTAGATTACAGCTGCCTATAAAACAGGGCATTTGGGGAGAAATACATGGACCAACACCT +TGAGACAGTATGCCCTGAAAAAAACAAGACCCTGATTCATAACCTCTATTTGCTTGATGG +TTTTTTTTGTCTTTGTTGTTTTTTTGTTAA diff --git a/test/input/TestSnpEff/ann_eff.txt.gz b/test/input/TestSnpEff/ann_eff.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..8781203efaac6507d0bd5aa7af5105a2706ba37a GIT binary patch literal 911 zcmV;A191EwiwFqqR99O9|6y)!Uu9-yE_8Tw0Hjr2Z{s!)eCGX-J}Cj0yUUN40IGrN zP^5>nXWXk`By=LcmVK7f!vDTQIZkqpZQyVM%%x~)Ih>tY-9K&2dNr6~wp=f=SHkZgvQ5Nm{1s z&1%~&azEZY=03l!=C1$r=ifeRa@J4^-u(0XH#&@_t{P*nt#e{Yn0=z4-UU|{g+oe? zBHr{4kCZjD?1mxtLvH__ZMqo_A)f1-#@y}(Qf(H`2En)#%$=$K_~L}3zos_`&K*OT zR`m!WMfMztSc45hToyH8cNPa6(F>>35Q69FO>MsBjZ>Y1Yj+uf`1m7)rfT*W7;i@6 zq%1M=3k_QK_l|^oiY0Xc(e>NB0n^=myUCB;syF?0N!(&cT|d+Y*WJ}$ra?7YQeymL z{ndEuBx%cJ`3)3Dce*shs9N$#g-L?5aRlnEpimyH5ZD5xCy7y_Qk#17NP(Kg&L9|d zVwwXn@jWq;to?wYY9@w|d{l|^F(#|zh&lN4Oq5L6x!TVj@}dSMNQA4oy4`caOxl9x zwC6;{rj{rq_XbFv(y>@3Wm6jyz5k6w1@MuAK(TG@{1%)?VR-@aK&v<>2cXlicrGYFdxFitX#Trjr>`K0&q2^NYt;-{|a=|MhtoE<<3 z^jdh4Gn@Z5?D|!|Tdtmm!&3Ee$q)_(Wi^GhX;tnTG=aOPtjhDDg6LHpMuTt(c0>m| z={y^Q1XRz84;)d7y~@yz6emEF49fd9Wlq3jt8!?JI68tU@e&duD@O)w&>c&Ie4!B5 z3xE?tD24RV{ThYpnOY34@Hj0*R%PjhM~WeW$Z`N|I357;@&TaEBQ-*fb@kdpEThp| z5stw^4W;RwKEX-2cn3%)RhdixvOq&NfV|tRFk=K$;$@J`A1qTGlVVhcVF9wawhI@{hl2o41+3ArPUk`~bU z_Uh?mXRx2_)RPCxG$@|Vb(TxDRK_4ve{NI~ZlABwpFA^Qm=tQEvZt#|RWLXDYF#`$ zpFbY$=TBdlUk@?T$XLd+EXZP&8|f@p*`t~pYv>6oGoLR$`t)q^GXO``f?H!22WQtb zJ`CdgL1npd{qGtEYOgR&ig;1&_3LL-a*LqmQYDMxp^LFam>!dh{fTOja*p7m{2w=i}vc8U@FR*20AZydY_%PeV%eg@i7A=fhgc%sw zSbl68v#l`+X3Ny*LEO`2ki>5fYxGw0?%P^4j8>A~7eSF%!b2PtH;c#IpqvpT37fN- zqDY2F1kqecI_F3bDb<`toLZb)rb%RoSu~RomCES+2A;}0MY{SUPV%tRPydyGbLV*{ z&XY&~F^v2|u|kOnF!HktDE&L^)`pO*zmZG-!*~IB;|~_%9qd$<#a!3&`kYn&YU%6c2H3Ji@sO zlA>)4D#8ITpZ8t8tDi!I51z{XIPn)S1o|f`3sq7)9}ragsrmb-IL5dA@4}Hl0x0+z zE6@a5Ggo1-{kd>@H`BY@#^mn4G3+;{*Mo*L@*1Cib4HE9aO7Q28yaq-W~RpU(-&;M z*&q7`wuISeH=KqGPiV*Eu3N6+yh7>kP?WcK{p+dYU6+%xG97bFeZr7g9*ruU!I&^C z%{DIASz0In5vFr+W{Eqv4BO14&XrrP+oNrcId_+v5>y>&u|o^$4zWa8HeAnk26lg9 zk4N_CQv=zkjR@?B8KtI$1viNxHX)SQxQT7tMost~aS>AC@&MT$m-9-vgh{6vH@Lms{L1m1a9okV(d$12v9`mzx`4IH zCL}F<`m+fl=zKQmgYuVCQF7h_xzF;O8@9CnXHV~yKFG1A-HfBcwn6E=H0s8(0 z?mgWPU+;=bK!HAqxQy8;+*XK(sar*=8W!s|KF`~tI`TQ4ZFef8-{#m zfGVD`5MvM>Zc%PxYHFTA@Sj6a2DOh-X;KW?3UqXtXe3-;Sq{BJ6@s?me7=1 zSv%q=>Asy$E)b4j0_rkiCXg!}bDJTA&u1mZi0K06!fncHo?fW_uC(M310Nnl=cZTD zO_iHi3JJ-nyLXvzkf2K(veSE&VCc>2^whzC{d6$CVapFCs9;DUCg2h!5R+Ac5^iCO z6B9844P>q@F?yjCV)zKntU#?!^Rynm3b%rx7R0Kz^9>v^9VSrjTyv11i|)hJdmR?h zoAs(NO|n$)2hF!^`JpgL1_wz9ID;nQ)qMrQ9om$HSd3FBeS+D7@E4;*X#B;9v`NOre0P6>l+K9bnN*cNc%zeFPfle)T{P{?b5Gby5n zxYy;8q!hHeJ`;jg;U00cOXC}go$lNjWK@8l5pHfm%5)u-ak|cPUP6f;o?%2z8R>w!Tm*>Ewc}@bY uA5Iff;NAhyVciB^-zV literal 0 HcmV?d00001 diff --git a/test/input/TestSnpEff/merged.vcf.gz b/test/input/TestSnpEff/merged.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..51f9a9fa946cadd90bb468553a7467167fc1d353 GIT binary patch literal 768 zcmV+b1ONOViwFb&00000{{{d;LjnNf0*#YzZ<|OE#$Rtf1*+d%CF0KP&i++>60w8N zCpSr5+V?nJCrGZHViH}x{SEk{OMEsxU_oM;-#qioFc_?=bzE)hA7wN8HqM`Dh-ZUA ze!cjV6{C-{$!J=nSC2oIu^wSsJU_(Im!_^Zcj+YVZtLoyskWQZyJ_5P8{T~P``h3w ze+ka^!TEYyHg99I^*XL&lGpL)W8B<6A2KF42LfiBCf>#REoBkQ@5v-SH1~(`SM!(n zMNjniS&?Q$3Er}#a2;GMFQc?h3h9KeqhIPZYZ$+9YsGLzr(YIh&& zU2^$XdD|p6=TGJOF~09|-EKEcb=T$k^Rscwyy&DOs(iSYC3JA)d0FrSnMofE&N^0#;d9BDf#S*jW0Sw%QpHfm%5)u-ak|cPUP6f;o?%2z8R>w!Tm*>Ewc}@bY sQU+6b{151?_Y^q$;!zFH3WiX9pb^T2*BV=a=E$QND9ykOHXlR)0DW^C1poj5 literal 0 HcmV?d00001 diff --git a/test/input/TestSnpEff/msa.fasta b/test/input/TestSnpEff/msa.fasta new file mode 100644 index 000000000..eedd6deb8 --- /dev/null +++ b/test/input/TestSnpEff/msa.fasta @@ -0,0 +1,400 @@ +>JQ685920 +ACGCTTAACAACAAAATCAGAGAAGAAGTAGACAGTGTCGTCTACAAAGCAAGAATGTAA +CACCCCTACAATGGATGCCGACAAGATTGTGTTTAAAGTCAATAATCAGGTGGTCTCTCT +GAAGCCTGAAATTATAATAGATCAATATGAATACAAGTACCCGGCTATCAAAGATTTGAA +AAAGCCCAGTATAACCTTAGGAAAAGCCCCTGACTTGAACAAAGCATACAAGTCAATTCT +GTCCGGCATGAATGCAGCCAAGCTTGACCCTGATGATGTATGCTCTTATCTAGCAGCCGC +AATGCAGTTCTTTGAGGGGACATGTCCTGATGACTGGACCAGCTATGGAATCCTGATTGC +ACGGAAGGGAGACAAGATTACTCCAAATTCTCTTGTGGACATAAAACGTACTAATGTGGA +AGGGAACTGGGCTTTGACAGGGGGTATGGAGTTGACGAGAGACCCCACAGTTTCGGAGCA +TGCATCCCTAGTTGGTCTTCTCTTGAGTCTTTACAGGTTAAGCAAAATATCTGGACAAAA +CACCGGCAATTACAAAACGAACATCGCAGACAGAATAGAGCAGATTTTCGAGACGGCCCC +CTTTGTAAAGATCGTAGAGCATCATACCTTGATGACAACCCACAAAATGTGTGCTAACTG +GAGTACTATACCGAACTTCAGATTTCTGGCCGGAACCTACGATATGTTTTTCTCGCGGGT +TGAACATCTGTATTCAGCAATTAGAGTGGGTACAGTTGTAACAGCCTATGAGGACTGCTC +AGGGTTGGTGTCGTTTACAGGGTTTATTAAGCAAATAAATCTCACTGCAAAAGAAGCAAT +ACTATATTTCTTCCACAAGAACTTCGAGGAAGAGATAAGAAGAATGTTCGAGCCGGGGCA +AGAGACGGCAGTTCCTCACTCCTATTTCATACATTTTCGTTCTTTGGGCCTGAGTGGGAA +ATCTCCGTATTCATCAAATGCAGTTGGTCACGTGTTCAACCTCATTCACTTTGTTGGATG +TTATATGGGTCAAGTGAGATCTTTGAACGCAACGGTTATTGCTACATGTGCCCCGCATGA +GATGTCTGTTCTTGGGGGTTATTTGGGGGAGGAGTTTTTCGGAAAAGGGACTTTTGAGAG +AAGATTCTTTAGGGATGAGAAAGAACTTCAGGAATATGAGGCAGCTGAAGCAACGAAGAC +TGAAATAGCCTTGGCGGATGACGGAACAGTCAATTCTGATGATGAGGACTACTTCTCTGG +TGAAACTAGGAGCCCGGAGGCAGTTTATACTCGAATCATGATGAATGGGGGTAGATTAAA +AAGATCACACATAAGGAGATATGTCTCAGTCAGTTCCAATCATCAAGCTCGCCCTAATTC +GTTCGCTGAGTTTCTAAGTAAGACATACTCTAGTGATTCATAAAGAATTGAACAACAGGA +TTGTAAACATTAACAAATTGTGTACATCCTTCACGAAAAAAACTAACACCCCTCCTCTTG +AACCATCTCAGACATGAGCAAGATTTTTGTTAACCCAAGTGCAATCAGGGCAGGCTTGGC +TGATCTGGAGATGGCAGAGGAAACTGTAGATCTAGTTGCCAAGAACATCGAAGATAATCA +AGCTCATCTCCAAGGAGAACCCATAGAGGTGGACAGTCTTCCTGAAGACATGAGACGGCT +TCAGTTAGACGATGAAAAACCATCTGGCCTCGGTGTGATTGCCAAAGCAGGGGAGAGCAA +ATGTCAGGAAGACTTTCAGATGGACGAGGGGGAGGACCCCGCCCTCTTGTTCCAGTCGTA +CCTAGACAATATTGGAGTTCAAATGGTCAGACAAATGAGATCAGGAGAGAGGTTCCTCAA +GATATGGTCTCAGACTGTTGAGGAGATCATATCCTATGTCACGGTCAATTTCCCCAGTCT +GCCAAGGAAAGCCTCAGAAGACAAGGCTACCCAGACTGCCAACCAGGAACTCAAGAAAAA +GACAATGTCTGTTTCTTCTCAGCGAGAAAGTAAATCATCTAAAGCTAAAATGGCGGCCCA +AACCGCCTCCGGTCCTCCTGCTCTAGAATGGTCCGCCACAAACGAGGAGGATGACCTATC +TGTGGAGGCTGAAATTGCTCACCAGATTGCTGAGAGCTTCTCTAAGAAGTACAAATTCCC +CTCTCGATCATCAGGGATATTCTTGTATAATTTTGAGCAGTTGAAGATGAACCTTGATGA +CATCGTTAAGGAGTCGAAAAATGTGCCAAGCGTAACCCGCTTAGCCCATGACGGATCCAA +ACTCCCTCTGAGGTGTGTGCTGGGGTGGGTTGCTCTAGCCAACTCCAAAAAGTTCCAGCT +GTTAGTTGAGCCTGACAAGCTAAACAAAATAATGCAAGACGACCTGAATCGTTATGTGTT +CTGCTGACCAAACCCTCAAACTCAGTCGTGCTATGCGATCAAATCCAGCCTGCTCCAAAC +CCAACGTGAAAAAAACAGGCAACACCACTGATAAAATGAACTTTCTACGCAAGATAGTAA +AGAACTGTAGAGATGAGGACACTCAGAAGCCCTCTTTCGTGTCGGCTCCTCCAGATGATG +ATGACTTGTGGTTACCCCCTCCGGAATATGTTCCATTGAAGGAACTCTCGGGTAAGAAAA +ACATGAGAAACTTTTGTATAAATGGAGAGGTCAAAGTGTGTAGTCCGAATGGCTATTCAT +TTAGGATCCTGAGGCACATTTTGAAATCATTCGATGAGATCTATTCTGGAAATCAAAGAA +TGATAGGGTTAGTTAAAGTTGTTGTTGGACTTGCGTTGTCAGGAGCCCCAGTCCCGGAGG +GCATGAACTGGGTATACAAATTGAGGAGAACTCTTATCTTCCAGTGGGCTGATTCTAGGG +GCCCTCTAGAGGGGGAGGAGTTAGAATACTCTCAGGAAATTACCTGGGATGACGATGCCG +AATTTGTCGGATTGGAAATACGAGTGAGTGCAAGACAGTGCCATATCCAGGGCAGGATTT +GGTGTATCAACATGAACTCTAGGGCATGTCAACTATGGTCTGACATGTCCCTTCAAACAC +AAAGGTCTGAGGAGGATAAAGACTCTTCAATGCTTCTGGAATAGTCAATTTACATCCTAC +AAATTCCTCAATTGTTTACCTCTGGAGGAGAGAGCACATGGACTTAACTCCAACCTTCAG +GAGCAATAGAACAAAAACATGTTATGGTGCCGTTGAATTGCTGCATTTTATCATAGTCAA +ATCAATTATCTTTACATTTTAAGCCTCTCGGATGTGAAAAAAACTATCAACATCCCTCAA +AAGACTTAAGGAAACATGATCCCTCAGGCTCTTCTGTTTGTGCCTTTTCTATTTCCCTCG +TTGTGTCTCGGGAAATTCCCCATCTACACCATACCGGAAAAGCTCGGCCCTTGGAGTCCC +ATCGACATACATCATCTCAGCTGTCCTAACAATTTGGTTGTGGAGGACGAGGGTTGCGAC +AGTCTGTCAGGGTTTTCTTACATGGAACTGAAGGTGGGTTACATCTCTGCCATAAAGGTG +AATGGGTTCACTTGTACCGGTGTCGTGACAGAAGCTGAGACCTACACCAACTTTGTTGGT +TATGTCACCACCACGTTCAAGAGAAAACACTTCCGCCCTATGCCAGATGCATGCAGAGCT +GCGTACAACTGGAAGACAGCCGGTGACCCTAGATATGAGGAGTCTCTTCACAATCCTTAT +CCTGATTACCATTGGCTACGGACCGTGAAAACCACCAAAGAATCTCTTGTTATCATATCG +CCGAGTGTGGCTGATTTGGACCCATATGACAAATCCCTTCATTCAAGAGTTTTCCCTGGT +GGGAAATGTTTGGGGATAACAATCTCTTCCACCTACTGCTCAACTAACCATGACTATACT +ATCTGGATGCCCGAAGAAGCAAGACTCGGGACATCTTGTGACATTTTTACCAACAGCAAA +GGGAAGAGGGCATCTAAGGGGGGTAGGACTTGCGGATTCGTGGATGAAAGGGGCTTATAT +AAGTCTCTAAAAGGGGCATGTAAACTTAAGCTGTGCGGAGTTCCTGGACTTAGACTTATG +GATGGAACGTGGGTCGCTATTCAGACACCAGGTGAGACCAAATGGTGCTCTCCTGATCAG +CTGGTAAATCTACATGACTTTCGTTCAGATGAGATAGAACATCTCGTCGTGGAGGAGTTG +ATCAAGAAGAGAGAAGAATGTCTAGATGCACTAGAGTCCATCATGACCACCAAATCAGTA +AGCTTCAGACGTCTCAGCAACTTGAGAAAACTTGTCCCTGGGTTTGGAAAGGCATACACT +ATATTCAACAAAACCTTGATGGAGGCTGATGCTCACTACAAGTCAGTTCGGACTTGGGAT +GAAATCATTCCCTCAAAGGGGTGCCTAAGAGTCGGAGGGAGGTGTCATCCTCATGTAAAC +GGAGTGTTTTTCAATGGTATAATTCTGGGTCCGGATGGACATGTCCTGATTCCAGAGATG +CAATCGTCCCTCCTCCAACAGCATATGGAGCTGTTGGAATCCTCTGTAATCCCCCTAATA +CATCCCTTGGCCGACCCATCAACAGTCTTCAAAGACGGTGATGAAGCGGAGGACTTTGTT +GAGGTTCACCTTCCGGACGTTCACAAGCAGGTCTCAGGGGTCGATCTTGGTCTCCCAAAC +TGGGGGAAATATGTGCTGATGAGTGCAGGTGTTCTAGCGACCGTGATACTGACAATCTTC +TTGTTAACATGTTGCAGAAGGGTTAACAGAACAAAACCAAAACAACAAAGTCTTGGGGAG +TCAGGAAGGAAAGTATCGGTTACTCCTCAAAATGGGAAGGTCATGTCTTCATGGGAGTAT +TACAAGAGTGAGGGCAGGACCAGTCTGTGAGTGCTGGCCATCTCCTCCATATCTTGCGTT +CAGAAGATCACCTCTCTTCTAGATCTGGGGGAATCTCTTGTTTTGACAGTCCTTTGTGGA +CTCCGTGCTACAAGGCAAAATTCGAGAGTCAAGAAACTTTCATTAATCATCCCAACTGAT +CAGACACAGTTACGTAGGTTCTGATAATGTATGACGTCTTCTGACAGTGTCAGTGACCAA +TGGTGCTCTCATCCCCCATGGACTGATACCAAAGGTTGTGGACAAACCAACCGATATCTC +AGATAATTCTAGGCTTGAGCCGGGGCAGGGACCGTGGCTAGTCCCCCTACACTAGACT-A +AATAATGGTTAGCTGAGGGAAGCGATTTGCCTCCTATGAAGGACATAAGCAATAGATCAC +AATCATCTTACATCCCGATAAGGTGTGCTTAACTACAAAGGGCTGGGCCATCCAAGCTTT +TCAGCCAAGAAAAAAACTGTGGAATGGAGGAGTAATTAACAACACTTCTCATCCTGAGAA +CTGCACCATGATGCTTGATCCAGGGGAGGTTTATGATGACCCCGTTGATCCGATTGAGTC +AGAGGCCGAGCCGAGAGGGAACCCAACCATTCCCAACATCTTAAGAAACTCTGACTACAA +TCTCAACTCTCCTCTAATAGAGGATCCAGCCAAACTAATGTTAGAATGGTTGAAGACAGG +AAATAGGCCTCTCCGGATAACTTTAACAGACAATTGCTCTAGGTCTTACAAAATTTTGAA +GGATTATTTCAAGAAAGTGGATATAGGATCTATCAAAGTGGGCGGGGCTGCAGCACAATC +TATGATCTCCCTTTGGTTGCACGGTGCCCACTCTGAATCAAATAGGAGCCGGAAGTGTAT +AACCGACTTGGCTCAGTTCTATTCCAAGTCTTCCCCCATAGAAAAGCTGTTAAATTACAC +ACTCGGAAATCGAGGGCTGAGGATCCCCCCAGAGGGGGTCCTAAGTTGCCTTGAGAGGGT +CGATTACGATAAAGCATTTGGGAGGTATCTGGTTAACATATACTCCTCTTACTTATTCTT +TCACGTGATCACCCTTTACATGAACGCCTTGGACTGGGATGAGGAGAAGACCATTCTAGC +ACTGTGGAGGGATTTAACCTCAATAGATATAGGAAAGGACTTGGTCAAGTTTAAAGATCA +AATATGGGGATTGCTAATTGTGACCAAGGATTTTGTGTACTCACAAAGTTCTAACTGCCT +TTTTGATAGAAACTACACGCTTATGCTTAAAGACCTTTTTTTGTCTCGGTTCAACTCTCT +AATGATTCTCCTTTCTCCTCCGGAACCCAGATATTCAGACGACCTGATATCCCAGCTGTG +TCAGCTATATATCGCTGGAGATCATGTCTTGTCTATGTGCGGGAACTCTGGTTATGAGGT +CATTAAAATATTGGAGCCGTACGTTGTGAACAGTTTAGTCCAGAGGGCAGAAAAGTTTAG +GCCTCTCATTCATTCCCTAGGGGACTTCCCTGTATTTATAAGAGATAAGGTAGGTCAGCT +TGAAGGAACATTTGGTCCCAGTGCAAAAAGGTTCTTCAGGGTTCTGGATCAATTCGACAA +TATACACGACTTAGTCTTTGTATACGGCTGTTATAGGCATTGGGGGCATCCTTACATAGA +TTATAGAAAGGGCTTATCGAAGCTATATGATCAAGTCCACATCAAGAAGGTGATAGATAA +GACTTACCAGGAGTGTTTGGCCAGCGACCTGGCCAAAAGGATCCTCAGGTGGGGATTTGA +CAAGTATTCCAAATGGTATATTGATTCAAGACTCCTCTCAAAGGACCACCCCCTAACTCC +TTATATCAAAACCCAGACGTGGCCTCCAAAACATGTGGTAGATTTGGTGGGTGACACTTG +GCATAAGCTCCCGATCACCCAGATCTTCGAGATCCCCGAATCAATGGACCCATCTGAGAT +ACTAGATGATAAATCACACTCTTTTACTAGAACTAGACTAGCGTCCTGGCTATCAGAGAA +CAGAGGAGGGCCGGTCCCCAGCGAGAAGGTCATTATCACTGCTCTTTCCAAGCCTCCTGT +CAACCCCAGGGAATTTCTGAAATCTATAGACCTGGGAGGATTGCCGGACGAGGATTTGAT +AATCGGCCTCAAGCCTAAGGAAAGAGAGTTGAAGATAGAAGGTCGGTTTTTTGCCTTGAT +GTCCTGGAATCTAAGGCTGTATTTTGTCATCACAGAAAAGCTCCTAGCCAATTATATCTT +GCCACTTTTTGACGCACTGACTATGACAGACAACTTGAACAAAGTGTTTAAAAAGCTGAT +CGACAGAGTCACCGGACAGGGGCTTTTAGACTACTCCAGAGTTACATACGCTTTTCACCT +GGACTATGAAAAGTGGAACAATCATCAGAGGCTGGAGTCGACAGAGGATGTATTTTCTGT +ACTTGATCAAGTATTCGGATTAAAGAGGGTGTTTTCCAGAACTCATGAGTTTTTTCAGAA +GTCTTGGATCTATTACTCAGATAGATCCGACCTCATCGGGTTATGGGAAGATCAAATATA +CTGTTTGGACATGTCAAACGGCCCGACATGCTGGAACGGCCAGGATGGCGGGCTAGAGGG +TTTGCGACAGAAAGGCTGGAGTCTGGTTAGCCTATTAATGATAGATCGTGAATCTCAAAC +CAGGAACACAAGAACTAAAATACTAGCTCAGGGAGACAACCAAGTTCTGTGTCCGACATA +TATGCTGTCGCCGGGGCTCTCTCGAGAGGGGCTTCTCTACGAGTTGGAGAGCATATCAAG +AAACGCTCTCTCGATATATCGTGCCATCGAGGAAGGGGCATCCAAACTGGGGCTCATCAT +AAAGAAGGAAGAGACCATGTGTAGCTATGACTTTCTCATCTATGGGAAAACTCCTTTATT +TCGAGGTAACATCTTGGTGCCTGAGTCCAAAAGATGGGCTAGGGTCTCCTGTATCTCCAA +CGACCAGATAGTCAACCTTGCCAATATAATGTCAACTGTATCCACTAATGCATTGACTGT +CGCCCAACACTCTCAGTCTTTGATTAAACCGATGAGGGACTTCCTGCTTATGTCAGTACA +AGCTGTCTTCCATTACTTGCTGTTTAGCCCCATCTTGAAAGGCAGAGTTTACAAGATCCT +GGGTGCCGAGGGGGACAATTTTCTTCTAGCTATGTCTAGGATAATTTATTTAGACCCCTC +ATTGGGGGGAGTATCTGGAATGTCTCTTGGAAGGTTCCACATACGTCAGTTCTCAGATCC +TGTCTCAGAAGGGTTGTCCTTCTGGAGAGAGATATGGCTGAGCTCCAGTGAGTCTTGGGT +TCATGCACTGTGTCAAGAGGCAGGAAACCCAGATCTGGGAGAGAGAACACTGGAAAGCTT +CACTCGCCTGCTCGAGGATCCTACTACCCTAAACATTAAAGGAGGGGCCAGTCCTACCAT +TCTTCTCAAAGATGCGATCAGAAAAGCCTTGTATGATGAGGTAGACAAGGTGGAGAACTC +AGAGTTTAGGGAAGCGATCCTCTTGTCTAAGACTCATAGGGATAATTTCATACTCTTCTT +GAAATCTGTTGAGCCTTTGTTCCCCCGATTTCTCAGTGAGCTCTTCAGTTCATCATTCCT +GGGGATTCCCGAATCAATCATTGGGTTGATACAAAACTCCAGAACAATAAGAAGACAGTT +TAGGAGGAGTCTCTCAAGAACCTTGGAAGAGTCCTTTTATAACTCAGAGATCCATGGGAT +CAACCGGATGACCCAGACTCCCCAGAGGGTCGGGAGAGTTTGGCCCTGCTCTTCTGAGAG +GGCAGACCTCCTAAGAGAAATCTCATGGGGAAGGAAGGTGGTCGGCACAACAGTTCCTCA +CCCCTCTGAGATGTTGGGGTTGATTCCCAAATCCTCTATTTCCTGTACTTGTGGAGTAAC +AGGAGGGGGAAACCCTCGAATATCAGTGTCTGTGCTTCCATCTTTTGATCAGTCATTTTT +TTCGAGAGGCTCTCTGAAAGGATATCTGGGCTCATCCACTTCCATGTCGACCCAGCTATT +CCATGCCTGGGAGAAAGTCACCAATGTTCATGTGGTGAAAAGAGCTCTGTCACTCAAGGA +ATCTATAAACTGGTTCATTACAAGAGACTCAAATTTGGCCCAAACTTTGATTAGGAACAT +AATGTCTCTGACAGGCCCAGATTTTCCTCTGGAAGAGGCCCCTGTCTTCAAGAGGACAGG +GTCTGCCCTGCATAGGTTCAAGTCGGCTAGATACAGTGAAGGGGGGTACTCTTCAGTTTG +TCCAAATCTCCTCTCTCACATCTCTGTCAGTACAGACACGATGTCTGATTTGACTCATGA +TGGGATAAATTATGACTTTATGTTTCAGCCGTTGATGCTTTATGCGCAAACATGGACATC +AGAACTGGTGCAAAAAGATACACGGCTGAAAGATTCTACCTTTCACTGGCATCTTCGATG +TAACAAGTGCATAAGGCCCATCGATGATATCACCCTTGATACCTCTCAGATCTTCGAGTT +CCCAGATGTCTCAAGAAGGATATCTAGGATGGTTTCTGGAGCTGTGCCTCATTTCAGAAA +GCTTCCTGATATTCGTTTGAGACCAGGTGACTTCGAATCTCTAAGTGGTAAAGAGAAGTC +TCGCCACATAGGATCGGCCCAAGGACTCTTGTATTCAATCTTGGTTGCGATCCATGACTC +TGGGTATAATGATGGAACCATTTTCCCTGTCAACATATATAGCAAGGTCTCCCCTAGAGA +CTATTTGAGAGGGCTTGCGAGAGGAGTCTTGATAGGGTCCTCGATTTGCTTCTTGACAAG +AATGACAAACATTAACATCAATAGACCTCTTGAACTGATCTCAGGAGTGATATCATACAT +CCTCTTGAGACTGGACAATCACCCATCCTTGTATATAATGCTCAGAGAGCCATCCCTTAG +AGGAGAGATATTTTCTATTCCCCAAAAGGTCCCCGCTGCTTACCCGACCACGATGAAAGA +AGGTAACAGATCTGTTCTGTGCTACCTCCAACATGTGTTGCGCTACGAACGAGAGGTGAT +CACTGCATCTCCGGAGAATGACTGGTTGTGGATCTTCTCGGACTTTAGAAGCTCTAAGAT +GACATATCTGACTCTCATCACCTACCAGTCTCATCTGTTACTTCAGAAGGTTGAGAAGAA +TCTCTCCAAGAGTATGAGAGCCAACCTACGGCAAATGAGCTCTCTGATGAGGCAGGTGCT +GGGTGGCCATGGTGAAGACACCTTAGAGTCAGACGAAGACATCCAGAGGCTGTTAAGAGA +CTCTCTGCGTAGGACAAGGTGGGTAGACCAGGAAGTGCGCCATGCAGCGAGAACTATGAC +AGGAGTTTACAGCCCCACCAAGAAGATGTCACGCAAAGCCGGGTGTTCAGAATGGGTCTG +CTCTGCACAGCAGGTTGCGGTTTCGACCTCATCGAACCCAGCCCCTGTTTCAGAGCTGGA +CGTCAGAGCCCTCTCCAAAAGGCTTCAAAACCCGTTGATCTCTGGACTGAGAGTGGTTCA +GTGGGCGACAGGGGCCCATTATAAGCTCAAACCTATTCTGGATGATCTCAATGTCTTTCC +ATCTCTATGTCTTGTGGTCGGGGATGGGTCAGGGGGGATATCAAGAGCAGTACTTAACAT +GTTTCCTGATGCCAAACTCGTATTCAACAGCCTGTTAGAAGTAAATGACCTGATGGCATC +AGGAACACATCCGTTGCCCCCCTCAGCAATCATGAGTGGAGGAGATGACATCATATCCAG +GGTAATTGGCTTCGACTCCATCTGGGAGAAACCTTCTGACTTGAGGAACTTGACCACATG +GAGGTACTTCCAGTCAGTCCAAGAACAAGTAAATATGTCATACGACCTTATCATTTGTGA +TGCAGAGGTCACTGACATTGCATCAATCAACCGAATAACCCTGCTAATGTCTGATTTTGC +ATTGTCCATAGACGGCCCACTTTATCTGGTTTTCAAAACTTACGGTACCATGCTTGTAAA +CCCTGACTACAGAGCAATTCAACATCTATCCAGAGCATTTCCTGCGGTCACTGGATTCAT +AACTCAGATGACCTCGTCCTTCTCATCCGAGCTATATCTCAGATTCTCAAAGCGGGGGAA +GTTCTTCCGGGATGCAGAGTATTTGACTTCTTCCACCCTTCGAGAAATGAGCCTTGTATT +GTTCAACTGTAGCAGCCCCAAGAGTGAGATGCAGAGAGCCCGCTCTCTAAATTACCAAGA +TCTTGTAAGAGGATTCCCAGATGAGATCATATCCAATCCTTACAGCGAAATGATCATAAC +TCTGATTGACAGTGATGTAGAATCTTTCCTGGTTCACAAAATGGTAGATGATCTAGAGTT +GCAGCGAGGAACTTTGTCTAAAGTTTCTATCATTATAGCCATCATGATAGTCTTCTCCAA +TAGGGTGTTCAATGTCTCAAAACCACTGACTGACCCTTTATTCTATCCGCCATCTGATCC +CAAGATCTTGAGACACTTCAACATATGCTGCGGCACCATGATGTACTTGTCTACCGCCTT +AGGGGATGTGCCCAGCTTCTCGAGACTTCATGACCTGTACAACAGACCCATAACTTATTA +TTTTAGGAAGCAAGTCATCCGGGGGAGTGTTTACCTGTCCTGGAGTTGGTCTGATGACAC +TTCAGTGTTTAAAAGGGTGGCTTGCAACTCTAGCTTGAGTCTCTCATCTCACTGGATCAG +GTTGATTTACAAAATAGTGAAGACCACCAGACTTGTAGGAAGGGTCGAGGACCTGTCCAG +GGAGGTAGAGAGGCACCTTCGGGGGTACAACAGATGGATCACCCTCGATGACATTAGATC +CCGATCATCTCTGTTAGATTATAGCTGCTTGTAAGACTAAGCACTCTCGAGGGAATGTAC +AGACTAAGATTATGGGACGGTGTAACCTGAAAAAAACAAGATCCCGATTCATAACTTCTG +TTTACTTGATTG-TTTTTCCATCTTTATTGTTTTTTTGTTAAGCGT +>RBV16-1 +----------ACAAAATCAGAGAAGAAGTAGACAGTATCATCTACAAAAAGAAAATGTAA +CACCTCTACAATGGATACCGACAAAATTGTATTCAAAGTCAATAACCAGGTTGTCTCTCT +AAAACCTGAAATTATTGTAGATCAATATGAGTACAAATACCCGGCTATCAAAGACTTAAA +AAAGCCCAGTATCTCCCTGGGAAAAGCTCCTGATTTGAACAAGGCGTATAAGTCAATTTT +GTCCGGTATGAATGCAGCTAAGCTCGACCCTGACGATGTGTGCTCTTACTTGGCAGCTGC +AATGCAGTTCTTTGAAGGAACATGTCCAGAAGACTGGACTAGCTATGGAATCTTGATTGC +AAGGAAAGGAGACAAGATAACTCCAAACTCTCTCGTAGACATAAAACGTACAGATGTAGA +AGGGAACTGGGCTCTAACAGGAGGAATGGAGTTGACTAGGGATCCCACCATTCCAGAACA +TGCATCTTTGGTTGGTCTTCTCCTGAGTTTGTATCGATTGAGCAAAATATCCGGACAGAA +CACAGGCAATTATAAGACAAATATCTCTGATCGTATAGAGCAGATTTTTGAGACGGCCCC +CTTTGTGAAGATCGTGGAACATCACACTTTGATGACAACTCACAAAATGTGCGCTAACTG +GAGCACCATACCGAACTTTAGATTCCTAGCCGGAACTTATGACATGTTTTTCTCTCGGAT +TGAACATCTATATTCAGCAATCAGAGTGGGTACAGTTGTCACTGCTTACGAGGACTGCTC +AGGGCTAGTATCCTTTACAGGTTTTATAAAGCAGATAAACCTTACAGCAAAGGAAGCAAT +ACTTTATTTCTTCCACAAAAATTTTGAGGGAGAGATAAGAAGAATGTTTGAGCCGGGACA +GGAAACTGCAGTCCCTCACTCCTATTTCATCCATTTCCGGTCTTTGGGCCTTAGTGGGAA +ATCTCCATATTCGTCAAGTGCAGTTGGTCACGTGTTCAACCTCATTCACTTTGTTGGATG +CTATATGGGTCAAGTGAGGTCTTTGAATGCAACGGTTATTGCCACATGTGCTCCACATGA +GATGTCTGTTCTCGGGGGTTATTTGGGGGAGGAGTTCTTTGGCAAGGGGACTTTTGAGAG +AAGATTCTTCAGAAACGAGAAGGAACTTCAGGACTATGAAGCAGCAGAGTTGACAAAGAC +TGAGGTCGCTCTGGCAGACGACGGAACAGTCAATTCTGACGATGAAGACTACTTCTCTGG +TGAAACCAGGAGTCCAGAGGCGGTCTATACTCGGATCATGATCAATGGGGGCCGACTCAA +AAGATCACATATAAGAAGGTATGTATCAGTCAGTTCCAATCATCAAGCTCGCCCCAATTC +ATTTGCTGAATTTCTAAACAAGACATATTCTAACGACCCGTAGGGAGTCGAACTTCAAGA +TTGTCAACAATAATAAATTGTTTAATTCCTCCACGAAAAAAACTAACACCCCTCCTTTTG +AACCATCCCAAGCATGAGCAAGATTTTTGTCAACCCAAGTGCGATCCGGGCCGGCCTTGC +TGACCTAGAGATGGCAGAGGAGACTGTAGATCTGATTGCCAGGAACATCGAGGACAATCA +GGCTCATCTCCAAGGAGAGCCTATAGAAGTAGATAGTCTGCCCGATGACATGAGACGGCT +CCACTTGGATAGTGAAAAACCGTCTGGTTTTGACAAGGTGACAAAAGAAGGGGAGAGCAA +GTGTCATGAAGACTTTCAGATGGATGAAGGGGAGGACCCCAGCCTCTTGTTCCAGTCATA +CCTGGATAATGTCGGAGTTCAAATAGTCAGGCAAATGAGGTCAGGAGAGAGATTCCTTAA +AATATGGTCTCAGACTGTGGAGGAGATCATATCCTATGTCATGATCAATTTCCCAGGCTC +TTTGGGGAGGCCTTCTGAAGACAAGGCCACTCAAACTGCCAATCGGGAACCCAAGAAAGG +AGTGGCATCAGTTTCGTCTCAACTCGAAGGTCAATCATCCAAAGCGAGAGCGGCAGCCCA +AACTGCCTCCGGTCCACCTGCCCTAGAATGGTCTGCCACCAACGAAGAAGACGATTTGTC +TGTGGAGGCAGAGATAGCTCATCAAATTGCCGAGAGCTTCTCCAAGAAGTACAAATTCCC +CTCTCGATCATCAGGGATATTCTTGTATAATTTTGAGCAGTTAAAGATGAACCTTGATGA +CATAGTCAAGGAATCAAAGAATGTGCCTGGTGTTACCCGCTTGGCCCATGAAGGGTCTAA +ACTCCCTCTAAGATGTGTACTTGGGTGGGTTGCTCAAGCCAACTCCAGAAAGTTCCAGCT +GCTGGTCGAGCCTGACAAGCTAAATAAAATAATGCAAGACGACCTAAACCGTTATTTGTC +TCGTCAACTAAATTTGTAGCCTCAGTCCCATTATGCAATCAAAACCAGTCTGATCCAAAG +TCAACGTGAAAAAAACAGGCAACACCACTAATACAATGAATTTCCTACGCAAGATAGTGA +AGAACTGTAGAGATGAGGACGATCAGAAGCCTTCTCTCGCATCGGCCCCCCCAGATGATG +ATGATCTGTGGTTGCCCCCTCCAGAATATGTCCCATTGAAGGAGCTCACAGGGAAGAAGA +ACATGAGAAACTTCTGTGTTAATGGGGAGGTCAAGGTGTGTAGTCCAAACGGATATTCCT +TTAGGATCTTGCGTCACATTCTGAAATCGTTCGATGAGATCTATTCCGGAAATCAAAGAA +TGATTGGGTTAGTCAAAGTTGTGGTCGGATTGGCCTTGTCTGGAGCCCCAGTCCCCGAGG +GCATGAACTGGGTTTACAAACTAAGGAGAACTCTTATTTTCCAATGGGCAGACTCTAGGG +GTCCTCTGGAAGGGGAAGAGTTGGAGTACTCTCAGGAAATTACCTGGGACGATGACACTG +AATTTGTAGGGTTGCAAATAAGGGTGAGTGCTAGACAATGCCATATCCAAGGTAGAATCT +GGTGCATTAACATGAACTCTAGAGCATGTCAATTGTGGTCTGATATGTCCCTTCAAACTC +ATAGATCTGAGGAGGACAAAGACTCCTCAGTCCTTCTAGAGTAGTCGAATTATATCTCAC +AAGTTCCTCAATTGTCCACCTCTGGAGGAGAGAACACATGGGCTCAACTCCAACCTTCAG +GAGCAATAGAACAAAAACATGTTATGGTGCCGTTGAATCGCTGCATTTTATCAGAGTCAA +ATCAATTACAATTGCACTTTAAGCCTCTTGGATGTGAAAAAAACTATTAACATCCCTCAA +AAGACCTGAGGAAAGATGGTCCCTTGGGCCCTTCTGTTTGTGCCTTTTCTGATCTCTTCA +TTGTGTTTTGGGAAGTTCCCTATCTACACAATACCAGACAAGCTAGGTCCTTGGAGTCCT +ATCGACATACATCATCTCAGTTGTCCCAATAATCTTGTTGTGGAAGACGAGGGGTGTAAT +AGTCTGTCGGGGTTTTCTTACATGGAACTAAAAGTGGGATACATCTCTGCCATAAAAGTG +AACGGGTTCACTTGTACTGGTGTCGTGACGGAAGCCGAGACCTACACTAACTTCGTCGGT +TATGTCACCACCACATTCAAGAGAAAACACTTTCGCCCGATGCCCGATGCATGTAGATCC +GCATACAATTGGAAAATGGCAGGCGATCCTAGGTATGAAGAGTCCCTTCACAACCCATAC +CCTGATTATCATTGGCTGCGGACAGTTAAAACCACCAAGGAGTCTCTTATCATCATCTCG +CCAAGTGTGGCTGACCTAGACCCGTATGACAAATCCCTTCATTCAAGGATTTTTCCTGGG +GGGAAATGCACGGGTCTAACAGTCTCTTCCACCTACTGCTCGACCAACCATGACTACACC +ATCTGGATGCCTGAAAAAGCAGGGCTCGGGACATCTTGTGACATCTTCACCAATAGTAAA +GGGAAGAGAGCATCTAAAGGAGGCAAGACTTGTGGATTTGTGGACGAGAGAGGTTTGTAT +AAGTCCTTGAAAGGAGCTTGTAAGCTCAAACTGTGCGGAGTTTCTGGGCTTAGACTTATG +GATGGAACTTGGGTTGCGATTCAGACATTGGATGAAACCAAATGGTGCTCTCCTGATCAA +CTGGTGAATCTGCATGACTTCCACTCGGATGAGCTTGAGCATCTTGTTGTAGAGGAGTTG +GTTAGAAAGAGGGAGGAATGTCTGGATGCATTAGAATCCATCATGACCACCAAATCAGTA +AGCTTCAGACGTCTAAGCCACCTGAGAAAACTAGTTCCTGGGTTTGGGAAGGCATACACC +ATATTCAACAAAACCCTAATGGAGGCCGATGCCCACTACAAGTCCGTTCGGACTTGGAGC +GAGATCATCCCCTCAAAAGGGTGTTTGAGAGTAGGGGGGAGATGTCATCCTCATGTAAAT +GGAGTATTTTTCAATGGCATCATTCTAGGTCCGGACGGGCATGTGTTGATCCCAGAAATG +CAGTCATCCCTTCTCCAACAACATGTGGAACTGTTGGAGTCCTCTGTGATCCCCCTCATG +CACCCCCTGGCAGATCCTTCAGCAGTTTTCAAAGATGGTGACGAGGCAGAGGATTTTGTT +GAGGTTCATCTCCCAGATGTTCACAAACAGATCTCAGGGGTGGACCTAGGCCTCCCAAGC +TGGGGAAAGTATATGCTGATGATTGCAGGTGCTCTAACGACTCTAATGCTGTTCATCTTC +TTGATGACATGTTGCAAAAGAGTCAACAGGACAAAGTCAATACGACAAGGCCCCAGAGAG +ATGGAAAGAAAAGTATCATTTACTCCCCAAAACAAAAAAGTCGTATCTTCATGGGAGTCT +TACAAGAGCGGAGGCGAGACCAAGCTGTAAG-GAGGGCTGCCCCCCCCACACACTATGCT +CGGAAAACTGTTCCCCTCTGATACATGGAAGAATGTCCTGCCTTGACAATCCCCCACGGA +CTCTGTCCAACAGGGTAAATTTCGA-AGTCAAGAAACTTTCATCGATCATCTCACTCGAC +CAGACACATTCAGGTGGACTTTGATGATGTATGAAGCATTTTTACAGTATCAGCGACTAA +TGGTGCTCTCACCCTCTAAGGACTGGTACTAAAGGTAGCGGACAGGCTGACTGACATCTC +AGACAACCCTGTTTCTTAGCTTGGGCAGAGGTTGTGATAAGCTCCTCTACCTTAAACTAA +AACAGTGATCAGCTGAGAAAACTGATTTGCCTCCTATGAAGGACACAAGCAATAGATCAC +AATCATCTCACATCCCAATAAGTTGTGCATAACTACAAAGGGCTGGGCCATCTAATCTCC +TCAATCAAGAAAAAAACTGTGAGACAGAGAAATCCTCAACAACACTTCTCATCCTGAACA +CTGCACCATGATGATTGATCCAGGGGAGGTTTATGATGACCCCATTGATCCAGTCGAGTC +TGAGAGTGAGCCAAGAGGAAACTCCAATATCCCCAACATCTTGCGAAATTCTGACTACAA +TCTCAACTCCCCTCTGATCGAGGATTCAGCCAAGCTGATGTTAGAATGGCTGAAAACAGG +AAATAGACCTCTCCGGATGACCCTGACCGACAATTGTTCTCGGTCTTACAAAGTTCTGAA +AGATTACCTCAAGAAAGTGGATCTAGGATCTCTCAAAGTGGGCGGAGCTGCAGCACAATC +TATGATCTCTCTTTGGTTATATGGTGCTCACTCTGAATCAAATAGGAGCAGGAGGTGTAT +GACTGACTTAGCTCAATTCTACTCCAAATCTTCCCCTATAGAGAAGCTGTTAAACTTCAC +ACTCGGGAACAGAGGGCTGAGAATCCCCCCTGAGGGAGTCTTAAGCTGTCTTGAGAGGGT +GGATTACGATAAGGCATTTGGGAGGTATTTGGCCAATATATATTCCTCCTATTTGTTCTT +TCATGTGATTACCCTTTACATGAATGCCCTAGATTGGGATGAGGAGAAGACTATCTTGGC +ACTGTGGAGGGAATTGACATCAGTGGATATGGGAAAGGACTTGGTCAAGTTTAAAGATCA +GATATGGGGACTTCTGATTGTGACTAAAGACTTCATATATTCACAAAGCTCTCACTGTCT +CTTTGACAGGAACTATACGCTTATGCTAAAAGACCTTTTTTTGTCCCGGTTCAACTCTCT +GATGATTTTGCTGTCCCCCCCGGAACCCAGATACTCAGATGACTTGATATCTCAGCTGTG +TCAGCTGTACATAGCAGGAGATCAAGTGTTGTCCATGTGCGGAAACTCTGGTTATGAAGT +CATAAAAATCTTGGAGCCGTATGTTGTAAACAGCTTGGTCTATAGAGCGGAAAAATTCAG +GCCTCTTATTCACTCTCTGGGGGACTTCCCTGTTTTTATAAAGGACAAGGTGAATCAACT +TGAGGGGACGTTTGGTCCTAGTGCGAAGAGATTCTTTAAAGTCTTGGATCAATTCGACAA +CATACATGATTTGGTCTTTGTATACGGTTGTTACAGGCATTGGGGGCACCCTTACATAGA +CTATAGAAAGGGTCTGTCAAAATTGTATGACCAGGTCCACATTAAAAAGGTGATAGATAA +GTCCTATCAGGAGTGTTTAGCGAGTGACCTGGCCAAAAGAATCCTCAGGTGGGGGTTCGA +CAAATACTCCAAATGGTATCTAGATCCACGACTTCTTGCGAGGGATCACCCCCTGACTCC +TTATATCAAGACCCAGACATGGCCTCCCAAGCACATAGTGGATTTGGTAGGTGATACCTG +GCATAGACTCCCGATCACCCAGATCTTCGAGATCCCTGAATCAATGGACCCATCAGAGAT +ATTAGATGACAAATCACACTCTTTCACTAGAACAAGGCTAGCGTCATGGCTTTCGGAAAA +CAGAGGAGGACCGGTTCCAAGTGAGAAGGTCATTATTACGGCTCTCTCCAAACCGCCTGT +CAACCCTAGAGAGTTTTTGAAATCTATAGACCTTGGGGGGCTGCCAGACGAAGACTTAAT +CATCGGCTTGAAACCCAAAGAAAGGGAGCTGAAGATCGAAGGCCGATTTTTTGCTTTAAT +GTCTTGGAATTTAAGGCTATACTTCGTTATTACAGAGAAGCTCCTGGCCAATTATATATT +ACCACTCTTTGATGCATTAACCATGACCGACAATTTGAACAAGGTGTTCAAAAAGTTGAT +TGATCGAGTCACCGGACAAGGGCTCTTAGATTATTCGAGGGTTACATATGCTTTTCACTT +GGACTATGAGAAGTGGAACAATCATCAGAGACTAGAATCGACTGAGGATGTATTCTCCGT +CCTTGATCAAGTGTTTGGATTAAAAAGGGTGTTCTCCAGAACTCATGAGTTTTTCCAGAA +GTCTTGGATATATTACTCAGATAGATCTGATCTTATAGGATTGTGGGAGGACCAGATATA +CTGTTTGGACATGTCAAATGGCCCGACGTGCTGGAACGGCCAAGATGGCGGGTTAGAGGG +GTTACGACAGAAGGGCTGGAGTCTGGTCAGCCTACTGATGATAGATCGAGAGTCTCAGAC +CAGGAACACAAGAACTAAGATACTAGCCCAGGGAGACAACCAAGTTCTATGTCCGACATA +CATGTTATCGCCTGGGCTTTCTAGAGAGGGCCTCCTCTACGAGTTGGAGAGTATATCAAG +AAACGCACTCTCAATATATCGAGCCATCGAGGAAGGGGCATCTAAACTGGGGCTCATTAT +AAAGAAGGAGGAGACAATGTGCAGCTATGACTTTCTCATTTATGGAAAAACTCCCTTATT +TCGAGGTAACATTCTGGTGCCTGAATCCAAAAGGTGGGCTAGAGTCTCCTGTATCTCTAA +CGACCAAATAGTCAACCTCGCTAATATAATGTCAACAGTGTCTACCAATGCTTTGACTGT +TGCTCAACACTCTCAGTCTTTGATTAAACCGATGAGGGACTTTCTGCTTATGTCAGTGCA +AGCCGTTTTTCATTACTTGCTATTTAGCCCCATTTTAAAAGGCAGAGTTTACAAAATACT +GAGTGCTGATGGGGATCATTTTCTTTTAGCTATGTCTAGAATAGTTTACCTTGACCCCTC +ATTGGGGGGTGTGTCTGGAATGTCTCTTGGGAGGTTTCACATACGCCAGTTTTCAGACCC +GGTCTCTGAGGGGTTATCTTTTTGGAGAGAGATTTGGTTAAGTTCAAATGAGTCTTGGAT +CCATGCACTTTGTCAAGAAGCAGGAAATCCGGATTTGGGAGAGAGAACACTGGAAAGCTT +CACACGCCTTCTCGAGGATCCCACTACCCTGAATATAAAAGGAGGGGCCAGTCCAACCAT +TCTTCTCAAGGATGCAATCAGAAAGGCCCTGTATGATGAAGTGGATAAGGTGGAGAACTC +CGAGTTTAGAGAAGCAATCCTCTTGTCCAAGACCCATAGGGACAACTTTATACTTTTCTT +GAGATCTGTTGAGCCTCTCTTTCCTAGATTTCTCAGTGAACTCTTCAGCTCTTCCTTCCT +GGGGATTCCTGAATCGATTATTGGGCTGATCCAAAATTCTAGAACGATAAGAAGACAGTT +TAGGAAGAACCTCTCTAGAACCTTAGAAGAGTCTTTCTATAACTCAGAGATTCATGGGAT +CAATCGGATGACACAGACTCCCCAGAGAATAGGAAGAGTTTGGGCCTGCTCTTCTGAGAG +GGCAGATCTCCTAAGAGAAATCTCGTGGGGGAGAAAGGTGGTTGGTACAACCGTCCCTCA +CCCCTCTGAGATGTTGGGGTTGCTTCCTAAATCTTCAATCTCCTGTACTTGTGGCGCAAC +AGGAGGGGGAAATCCTCGAATATCAGTGTCTGTACTCCCGTCCTTTGATCAGTCGTTCTT +TTCCAGAGGCCCTCTGAAGGGATACCTGGGCTCATCCACATCTATGTCAACCCAGCTGTT +CCATGCTTGGGAGAAGGTCACCAATGTTCATGTGGTGAAAAGGGCCCTTTCACTCAAAGA +GTCCATAAATTGGTTCGTCACAAGAAACTCCAATTTGGCTCAAACTTTAATCAGGAATAT +AATGTCTCTGACGGGACCAGACTTCCCGCTGGAAGAGGCCCCCGTCTTTAAGAGAACAGG +ATCAGCTCTGCACAGGTTCAAGTCAGCTAGGTACAGTGAAGGGGGTTATTCCTCTGTCTG +TCCCAATCTCCTCTCTCATATTTCCGTCAGCACAGACACGATGTCTGACCTGACTCAAGA +TGGGAAGAACTATGACTTTATGTTCCAGCCGCTGATGCTTTACGCACAGACATGGACATC +AGAACTGGTGCAGAAGGACATACGGCTGAGAGACTCCACCTTTCACTGGCATCTTCGATG +CAATAAGTGTATAAGGTCCATCGATGACATCACTCTAGAGACTTCCCAGATCTTTGAATT +CCCGGATGTTTCGAAAAGGATATCTAGGATGGTCTCTGGAGCAGTGCCTCACTTTCAAAA +ACTTCCTGATATTCGTCTAAGACCTGGAGACTTTGAGTCTCTAAGTGACAAAGAGAAGTC +ACGCCACATAGGGTCGGCTCAGGGGCTCTTGTATTCGATCCTGGTTGCGATCCACGACTC +TGGGTACAACGATGGAACCATTTTTCCTGTCAACATATATAGCAAGGTCTCCCCGAGAGA +CTATTTAAGAGGTCTCGCCAGAGGAATCTTAATAGGGTCTTCTATCTGCTTTCTAACAAG +AATGACAAACATCAACATCAACAGACCCCTTGAACTAATCTCAGGAGTGATCTCATACAT +CCTCCTTAGATTAGACAATCACCCATCCTTATACATCATGCTCAGAGAACCGTCTCTGAG +AGGAGAAATATTTTCTATTCCCCAAAAGATCCCCGCCGCTTACCCAACCACAATGAAAGA +GGGCAACAGATCTATCCTGTGTTACCTCCAGCATGTGCTCCGCTATGAGCGGGAGGTAAT +TACAGCATCCCCGGAGAATGATTGGTTATGGATCTTCTCTGACTTCAGAAGTTCCAAAAT +GACCTATTTAACCCTTATCACCTATCAGTCTCATCTTTTACTTCAGAGGGTTGAGAAGAA +TCTCTCTAAAAATATGAGGGCCAACCTACGACAGATGAGCTCTCTCATGAGACAAGTATT +GGGCGGGCATGGCGAAGACACCTTAGAATCAGACGAGGACATTCAGAGGTTGTTGAAAGA +CTCATTACGTAGGACAAGATGGGTAGATCAAGAGGTGCGTCATGCAGCCAGAGCCATGAC +AGGGGGTTATAGCCCCAATAAGAAGATGTCTCGCAAAGCGGGGTGTTCAGAATGGGTCTG +CTCTGCCCAACAGGTTGCTGTTTCAACCTCAGCAAATCCAGCCCCTGTCTCTGAGCTGGA +CATCAGGGCTCTCTCCAAAAGACTTCAAAACCCGTTGATTTCAGGTCTGAGAGTTGTTCA +GTGGGCAACAGGCGCTCATTACAAACTCAAACCTATTCTTGATGATCTCAATGTGTTCCC +ATCCTTATGTCTTGTGGTCGGAGACGGGTCAGGGGGGATCTCAAGGGCTGTACTCAATAT +GTTTCCGGACGCTAGGCTCGTGTTCAACAGCCTGTTGGAAGTGAATGACCTGATGGCTTC +AGGGACACATCCGTTGCCTCCTTCAGCAATCATGAGCGGAGGGGATGATATCATATCTAG +GGTGATTGACTTTGACTCCATCTGGGAAAAGCCTTCTGACTTAAGGAACTTGACGACATG +GAGGTACTTCCAGTCGGTTCAAGAACAAGTGAATATGTCCTATGATCTTATTATTTGTGA +CGCAGAGGTCACTGACATCGCATCAATTAATCGAATAACTCTACTAATGTCTGATTTTGC +ATTATCAATAGACGGCCCACTTTATTTAGTTTTCAAAACTTATGGAACCATGCTCGTGAA +TCCTGATTACAGAGCCGTTCAACATCTGTCCAGAGCATTTCCCACAGTCACAGGATTCAT +AACCCAGATGACATCATCCTTCTCGTCTGAGCTATACCTCAGATTCTCTAAAAGGGGAAA +ATTTTTCCGAGATGCAGAATACTTGACTTCTTCTACTATTAGGGAGATGAGCCTTGTATT +GTTCAACTGCAGCAGCCCAAAAAGTGAGATGCAGAGGGCCCGCTCTCTGAATTACCAGGA +CCTTGTAAGAGGATTTCCTGAGGAGATCATATCCAACCCATATAATGAGATGATCATAAC +TCTGATTGACAGTGATGTGGAATCTTTCCTGGTTCATAAGATGGTTGATGACCTAGAGTT +GCAGCGAGGAACTTTGTCTAAAGTCTCCATTATCGTAGCCATCATGATAGTCTTTTCCAA +CAGGGTGTTTAATGTCTCGAAACCGTTGACTGACCCTTTATTCAATCCGCCATCTGACCC +CAAAATCTTGAGACACTTCAATATTTGCTGCAGTACCATGATGTACTTGTCTACTGCTCT +GGGGGATGTTCCAAGCTTTGCTAGACTTCATGACCTGTATAACAGACCAATAACCTACTA +TTTCGGGAAGAAAGTTATCCGAGGGAACATTTATTTATCCTGGAGTTGGTCTGACGACAC +TTCAGTGTTCAAAAGGGTGGCTTGCAATTCTAGTTTGAGCCTCTCGGCTCACTGGATAAG +GCTGATTTACAAAATAGTGAAGACTACCAGACTTGTGGGAAATACGGAGAATCTATCCAG +GGAGGTCGAAAAGCACCTTCGAGGGTACAACAGGTGGATTACCCTAGACGACATAAAATC +CAGATCATCTCTTCTAGATTACAGCTGCCTATAAAACAGGGCATTTGGGGAGAAATACAT +GGACCAACACCTTGAGACAGTATGCCCTGAAAAAAACAAGACCCTGATTCATAACCTCTA +TTTGCTTGATGGTTTTTTTTGTCTTTGTTGTTTTTTTGTTAA---- diff --git a/test/input/TestSnpEff/ref-rabies-JQ685920.fasta b/test/input/TestSnpEff/ref-rabies-JQ685920.fasta new file mode 100644 index 000000000..872a61572 --- /dev/null +++ b/test/input/TestSnpEff/ref-rabies-JQ685920.fasta @@ -0,0 +1,200 @@ +>JQ685920 +ACGCTTAACAACAAAATCAGAGAAGAAGTAGACAGTGTCGTCTACAAAGCAAGAATGTAA +CACCCCTACAATGGATGCCGACAAGATTGTGTTTAAAGTCAATAATCAGGTGGTCTCTCT +GAAGCCTGAAATTATAATAGATCAATATGAATACAAGTACCCGGCTATCAAAGATTTGAA +AAAGCCCAGTATAACCTTAGGAAAAGCCCCTGACTTGAACAAAGCATACAAGTCAATTCT +GTCCGGCATGAATGCAGCCAAGCTTGACCCTGATGATGTATGCTCTTATCTAGCAGCCGC +AATGCAGTTCTTTGAGGGGACATGTCCTGATGACTGGACCAGCTATGGAATCCTGATTGC +ACGGAAGGGAGACAAGATTACTCCAAATTCTCTTGTGGACATAAAACGTACTAATGTGGA +AGGGAACTGGGCTTTGACAGGGGGTATGGAGTTGACGAGAGACCCCACAGTTTCGGAGCA +TGCATCCCTAGTTGGTCTTCTCTTGAGTCTTTACAGGTTAAGCAAAATATCTGGACAAAA +CACCGGCAATTACAAAACGAACATCGCAGACAGAATAGAGCAGATTTTCGAGACGGCCCC +CTTTGTAAAGATCGTAGAGCATCATACCTTGATGACAACCCACAAAATGTGTGCTAACTG +GAGTACTATACCGAACTTCAGATTTCTGGCCGGAACCTACGATATGTTTTTCTCGCGGGT +TGAACATCTGTATTCAGCAATTAGAGTGGGTACAGTTGTAACAGCCTATGAGGACTGCTC +AGGGTTGGTGTCGTTTACAGGGTTTATTAAGCAAATAAATCTCACTGCAAAAGAAGCAAT +ACTATATTTCTTCCACAAGAACTTCGAGGAAGAGATAAGAAGAATGTTCGAGCCGGGGCA +AGAGACGGCAGTTCCTCACTCCTATTTCATACATTTTCGTTCTTTGGGCCTGAGTGGGAA +ATCTCCGTATTCATCAAATGCAGTTGGTCACGTGTTCAACCTCATTCACTTTGTTGGATG +TTATATGGGTCAAGTGAGATCTTTGAACGCAACGGTTATTGCTACATGTGCCCCGCATGA +GATGTCTGTTCTTGGGGGTTATTTGGGGGAGGAGTTTTTCGGAAAAGGGACTTTTGAGAG +AAGATTCTTTAGGGATGAGAAAGAACTTCAGGAATATGAGGCAGCTGAAGCAACGAAGAC +TGAAATAGCCTTGGCGGATGACGGAACAGTCAATTCTGATGATGAGGACTACTTCTCTGG +TGAAACTAGGAGCCCGGAGGCAGTTTATACTCGAATCATGATGAATGGGGGTAGATTAAA +AAGATCACACATAAGGAGATATGTCTCAGTCAGTTCCAATCATCAAGCTCGCCCTAATTC +GTTCGCTGAGTTTCTAAGTAAGACATACTCTAGTGATTCATAAAGAATTGAACAACAGGA +TTGTAAACATTAACAAATTGTGTACATCCTTCACGAAAAAAACTAACACCCCTCCTCTTG +AACCATCTCAGACATGAGCAAGATTTTTGTTAACCCAAGTGCAATCAGGGCAGGCTTGGC +TGATCTGGAGATGGCAGAGGAAACTGTAGATCTAGTTGCCAAGAACATCGAAGATAATCA +AGCTCATCTCCAAGGAGAACCCATAGAGGTGGACAGTCTTCCTGAAGACATGAGACGGCT +TCAGTTAGACGATGAAAAACCATCTGGCCTCGGTGTGATTGCCAAAGCAGGGGAGAGCAA +ATGTCAGGAAGACTTTCAGATGGACGAGGGGGAGGACCCCGCCCTCTTGTTCCAGTCGTA +CCTAGACAATATTGGAGTTCAAATGGTCAGACAAATGAGATCAGGAGAGAGGTTCCTCAA +GATATGGTCTCAGACTGTTGAGGAGATCATATCCTATGTCACGGTCAATTTCCCCAGTCT +GCCAAGGAAAGCCTCAGAAGACAAGGCTACCCAGACTGCCAACCAGGAACTCAAGAAAAA +GACAATGTCTGTTTCTTCTCAGCGAGAAAGTAAATCATCTAAAGCTAAAATGGCGGCCCA +AACCGCCTCCGGTCCTCCTGCTCTAGAATGGTCCGCCACAAACGAGGAGGATGACCTATC +TGTGGAGGCTGAAATTGCTCACCAGATTGCTGAGAGCTTCTCTAAGAAGTACAAATTCCC +CTCTCGATCATCAGGGATATTCTTGTATAATTTTGAGCAGTTGAAGATGAACCTTGATGA +CATCGTTAAGGAGTCGAAAAATGTGCCAAGCGTAACCCGCTTAGCCCATGACGGATCCAA +ACTCCCTCTGAGGTGTGTGCTGGGGTGGGTTGCTCTAGCCAACTCCAAAAAGTTCCAGCT +GTTAGTTGAGCCTGACAAGCTAAACAAAATAATGCAAGACGACCTGAATCGTTATGTGTT +CTGCTGACCAAACCCTCAAACTCAGTCGTGCTATGCGATCAAATCCAGCCTGCTCCAAAC +CCAACGTGAAAAAAACAGGCAACACCACTGATAAAATGAACTTTCTACGCAAGATAGTAA +AGAACTGTAGAGATGAGGACACTCAGAAGCCCTCTTTCGTGTCGGCTCCTCCAGATGATG +ATGACTTGTGGTTACCCCCTCCGGAATATGTTCCATTGAAGGAACTCTCGGGTAAGAAAA +ACATGAGAAACTTTTGTATAAATGGAGAGGTCAAAGTGTGTAGTCCGAATGGCTATTCAT +TTAGGATCCTGAGGCACATTTTGAAATCATTCGATGAGATCTATTCTGGAAATCAAAGAA +TGATAGGGTTAGTTAAAGTTGTTGTTGGACTTGCGTTGTCAGGAGCCCCAGTCCCGGAGG +GCATGAACTGGGTATACAAATTGAGGAGAACTCTTATCTTCCAGTGGGCTGATTCTAGGG +GCCCTCTAGAGGGGGAGGAGTTAGAATACTCTCAGGAAATTACCTGGGATGACGATGCCG +AATTTGTCGGATTGGAAATACGAGTGAGTGCAAGACAGTGCCATATCCAGGGCAGGATTT +GGTGTATCAACATGAACTCTAGGGCATGTCAACTATGGTCTGACATGTCCCTTCAAACAC +AAAGGTCTGAGGAGGATAAAGACTCTTCAATGCTTCTGGAATAGTCAATTTACATCCTAC +AAATTCCTCAATTGTTTACCTCTGGAGGAGAGAGCACATGGACTTAACTCCAACCTTCAG +GAGCAATAGAACAAAAACATGTTATGGTGCCGTTGAATTGCTGCATTTTATCATAGTCAA +ATCAATTATCTTTACATTTTAAGCCTCTCGGATGTGAAAAAAACTATCAACATCCCTCAA +AAGACTTAAGGAAACATGATCCCTCAGGCTCTTCTGTTTGTGCCTTTTCTATTTCCCTCG +TTGTGTCTCGGGAAATTCCCCATCTACACCATACCGGAAAAGCTCGGCCCTTGGAGTCCC +ATCGACATACATCATCTCAGCTGTCCTAACAATTTGGTTGTGGAGGACGAGGGTTGCGAC +AGTCTGTCAGGGTTTTCTTACATGGAACTGAAGGTGGGTTACATCTCTGCCATAAAGGTG +AATGGGTTCACTTGTACCGGTGTCGTGACAGAAGCTGAGACCTACACCAACTTTGTTGGT +TATGTCACCACCACGTTCAAGAGAAAACACTTCCGCCCTATGCCAGATGCATGCAGAGCT +GCGTACAACTGGAAGACAGCCGGTGACCCTAGATATGAGGAGTCTCTTCACAATCCTTAT +CCTGATTACCATTGGCTACGGACCGTGAAAACCACCAAAGAATCTCTTGTTATCATATCG +CCGAGTGTGGCTGATTTGGACCCATATGACAAATCCCTTCATTCAAGAGTTTTCCCTGGT +GGGAAATGTTTGGGGATAACAATCTCTTCCACCTACTGCTCAACTAACCATGACTATACT +ATCTGGATGCCCGAAGAAGCAAGACTCGGGACATCTTGTGACATTTTTACCAACAGCAAA +GGGAAGAGGGCATCTAAGGGGGGTAGGACTTGCGGATTCGTGGATGAAAGGGGCTTATAT +AAGTCTCTAAAAGGGGCATGTAAACTTAAGCTGTGCGGAGTTCCTGGACTTAGACTTATG +GATGGAACGTGGGTCGCTATTCAGACACCAGGTGAGACCAAATGGTGCTCTCCTGATCAG +CTGGTAAATCTACATGACTTTCGTTCAGATGAGATAGAACATCTCGTCGTGGAGGAGTTG +ATCAAGAAGAGAGAAGAATGTCTAGATGCACTAGAGTCCATCATGACCACCAAATCAGTA +AGCTTCAGACGTCTCAGCAACTTGAGAAAACTTGTCCCTGGGTTTGGAAAGGCATACACT +ATATTCAACAAAACCTTGATGGAGGCTGATGCTCACTACAAGTCAGTTCGGACTTGGGAT +GAAATCATTCCCTCAAAGGGGTGCCTAAGAGTCGGAGGGAGGTGTCATCCTCATGTAAAC +GGAGTGTTTTTCAATGGTATAATTCTGGGTCCGGATGGACATGTCCTGATTCCAGAGATG +CAATCGTCCCTCCTCCAACAGCATATGGAGCTGTTGGAATCCTCTGTAATCCCCCTAATA +CATCCCTTGGCCGACCCATCAACAGTCTTCAAAGACGGTGATGAAGCGGAGGACTTTGTT +GAGGTTCACCTTCCGGACGTTCACAAGCAGGTCTCAGGGGTCGATCTTGGTCTCCCAAAC +TGGGGGAAATATGTGCTGATGAGTGCAGGTGTTCTAGCGACCGTGATACTGACAATCTTC +TTGTTAACATGTTGCAGAAGGGTTAACAGAACAAAACCAAAACAACAAAGTCTTGGGGAG +TCAGGAAGGAAAGTATCGGTTACTCCTCAAAATGGGAAGGTCATGTCTTCATGGGAGTAT +TACAAGAGTGAGGGCAGGACCAGTCTGTGAGTGCTGGCCATCTCCTCCATATCTTGCGTT +CAGAAGATCACCTCTCTTCTAGATCTGGGGGAATCTCTTGTTTTGACAGTCCTTTGTGGA +CTCCGTGCTACAAGGCAAAATTCGAGAGTCAAGAAACTTTCATTAATCATCCCAACTGAT +CAGACACAGTTACGTAGGTTCTGATAATGTATGACGTCTTCTGACAGTGTCAGTGACCAA +TGGTGCTCTCATCCCCCATGGACTGATACCAAAGGTTGTGGACAAACCAACCGATATCTC +AGATAATTCTAGGCTTGAGCCGGGGCAGGGACCGTGGCTAGTCCCCCTACACTAGACTAA +ATAATGGTTAGCTGAGGGAAGCGATTTGCCTCCTATGAAGGACATAAGCAATAGATCACA +ATCATCTTACATCCCGATAAGGTGTGCTTAACTACAAAGGGCTGGGCCATCCAAGCTTTT +CAGCCAAGAAAAAAACTGTGGAATGGAGGAGTAATTAACAACACTTCTCATCCTGAGAAC +TGCACCATGATGCTTGATCCAGGGGAGGTTTATGATGACCCCGTTGATCCGATTGAGTCA +GAGGCCGAGCCGAGAGGGAACCCAACCATTCCCAACATCTTAAGAAACTCTGACTACAAT +CTCAACTCTCCTCTAATAGAGGATCCAGCCAAACTAATGTTAGAATGGTTGAAGACAGGA +AATAGGCCTCTCCGGATAACTTTAACAGACAATTGCTCTAGGTCTTACAAAATTTTGAAG +GATTATTTCAAGAAAGTGGATATAGGATCTATCAAAGTGGGCGGGGCTGCAGCACAATCT +ATGATCTCCCTTTGGTTGCACGGTGCCCACTCTGAATCAAATAGGAGCCGGAAGTGTATA +ACCGACTTGGCTCAGTTCTATTCCAAGTCTTCCCCCATAGAAAAGCTGTTAAATTACACA +CTCGGAAATCGAGGGCTGAGGATCCCCCCAGAGGGGGTCCTAAGTTGCCTTGAGAGGGTC +GATTACGATAAAGCATTTGGGAGGTATCTGGTTAACATATACTCCTCTTACTTATTCTTT +CACGTGATCACCCTTTACATGAACGCCTTGGACTGGGATGAGGAGAAGACCATTCTAGCA +CTGTGGAGGGATTTAACCTCAATAGATATAGGAAAGGACTTGGTCAAGTTTAAAGATCAA +ATATGGGGATTGCTAATTGTGACCAAGGATTTTGTGTACTCACAAAGTTCTAACTGCCTT +TTTGATAGAAACTACACGCTTATGCTTAAAGACCTTTTTTTGTCTCGGTTCAACTCTCTA +ATGATTCTCCTTTCTCCTCCGGAACCCAGATATTCAGACGACCTGATATCCCAGCTGTGT +CAGCTATATATCGCTGGAGATCATGTCTTGTCTATGTGCGGGAACTCTGGTTATGAGGTC +ATTAAAATATTGGAGCCGTACGTTGTGAACAGTTTAGTCCAGAGGGCAGAAAAGTTTAGG +CCTCTCATTCATTCCCTAGGGGACTTCCCTGTATTTATAAGAGATAAGGTAGGTCAGCTT +GAAGGAACATTTGGTCCCAGTGCAAAAAGGTTCTTCAGGGTTCTGGATCAATTCGACAAT +ATACACGACTTAGTCTTTGTATACGGCTGTTATAGGCATTGGGGGCATCCTTACATAGAT +TATAGAAAGGGCTTATCGAAGCTATATGATCAAGTCCACATCAAGAAGGTGATAGATAAG +ACTTACCAGGAGTGTTTGGCCAGCGACCTGGCCAAAAGGATCCTCAGGTGGGGATTTGAC +AAGTATTCCAAATGGTATATTGATTCAAGACTCCTCTCAAAGGACCACCCCCTAACTCCT +TATATCAAAACCCAGACGTGGCCTCCAAAACATGTGGTAGATTTGGTGGGTGACACTTGG +CATAAGCTCCCGATCACCCAGATCTTCGAGATCCCCGAATCAATGGACCCATCTGAGATA +CTAGATGATAAATCACACTCTTTTACTAGAACTAGACTAGCGTCCTGGCTATCAGAGAAC +AGAGGAGGGCCGGTCCCCAGCGAGAAGGTCATTATCACTGCTCTTTCCAAGCCTCCTGTC +AACCCCAGGGAATTTCTGAAATCTATAGACCTGGGAGGATTGCCGGACGAGGATTTGATA +ATCGGCCTCAAGCCTAAGGAAAGAGAGTTGAAGATAGAAGGTCGGTTTTTTGCCTTGATG +TCCTGGAATCTAAGGCTGTATTTTGTCATCACAGAAAAGCTCCTAGCCAATTATATCTTG +CCACTTTTTGACGCACTGACTATGACAGACAACTTGAACAAAGTGTTTAAAAAGCTGATC +GACAGAGTCACCGGACAGGGGCTTTTAGACTACTCCAGAGTTACATACGCTTTTCACCTG +GACTATGAAAAGTGGAACAATCATCAGAGGCTGGAGTCGACAGAGGATGTATTTTCTGTA +CTTGATCAAGTATTCGGATTAAAGAGGGTGTTTTCCAGAACTCATGAGTTTTTTCAGAAG +TCTTGGATCTATTACTCAGATAGATCCGACCTCATCGGGTTATGGGAAGATCAAATATAC +TGTTTGGACATGTCAAACGGCCCGACATGCTGGAACGGCCAGGATGGCGGGCTAGAGGGT +TTGCGACAGAAAGGCTGGAGTCTGGTTAGCCTATTAATGATAGATCGTGAATCTCAAACC +AGGAACACAAGAACTAAAATACTAGCTCAGGGAGACAACCAAGTTCTGTGTCCGACATAT +ATGCTGTCGCCGGGGCTCTCTCGAGAGGGGCTTCTCTACGAGTTGGAGAGCATATCAAGA +AACGCTCTCTCGATATATCGTGCCATCGAGGAAGGGGCATCCAAACTGGGGCTCATCATA +AAGAAGGAAGAGACCATGTGTAGCTATGACTTTCTCATCTATGGGAAAACTCCTTTATTT +CGAGGTAACATCTTGGTGCCTGAGTCCAAAAGATGGGCTAGGGTCTCCTGTATCTCCAAC +GACCAGATAGTCAACCTTGCCAATATAATGTCAACTGTATCCACTAATGCATTGACTGTC +GCCCAACACTCTCAGTCTTTGATTAAACCGATGAGGGACTTCCTGCTTATGTCAGTACAA +GCTGTCTTCCATTACTTGCTGTTTAGCCCCATCTTGAAAGGCAGAGTTTACAAGATCCTG +GGTGCCGAGGGGGACAATTTTCTTCTAGCTATGTCTAGGATAATTTATTTAGACCCCTCA +TTGGGGGGAGTATCTGGAATGTCTCTTGGAAGGTTCCACATACGTCAGTTCTCAGATCCT +GTCTCAGAAGGGTTGTCCTTCTGGAGAGAGATATGGCTGAGCTCCAGTGAGTCTTGGGTT +CATGCACTGTGTCAAGAGGCAGGAAACCCAGATCTGGGAGAGAGAACACTGGAAAGCTTC +ACTCGCCTGCTCGAGGATCCTACTACCCTAAACATTAAAGGAGGGGCCAGTCCTACCATT +CTTCTCAAAGATGCGATCAGAAAAGCCTTGTATGATGAGGTAGACAAGGTGGAGAACTCA +GAGTTTAGGGAAGCGATCCTCTTGTCTAAGACTCATAGGGATAATTTCATACTCTTCTTG +AAATCTGTTGAGCCTTTGTTCCCCCGATTTCTCAGTGAGCTCTTCAGTTCATCATTCCTG +GGGATTCCCGAATCAATCATTGGGTTGATACAAAACTCCAGAACAATAAGAAGACAGTTT +AGGAGGAGTCTCTCAAGAACCTTGGAAGAGTCCTTTTATAACTCAGAGATCCATGGGATC +AACCGGATGACCCAGACTCCCCAGAGGGTCGGGAGAGTTTGGCCCTGCTCTTCTGAGAGG +GCAGACCTCCTAAGAGAAATCTCATGGGGAAGGAAGGTGGTCGGCACAACAGTTCCTCAC +CCCTCTGAGATGTTGGGGTTGATTCCCAAATCCTCTATTTCCTGTACTTGTGGAGTAACA +GGAGGGGGAAACCCTCGAATATCAGTGTCTGTGCTTCCATCTTTTGATCAGTCATTTTTT +TCGAGAGGCTCTCTGAAAGGATATCTGGGCTCATCCACTTCCATGTCGACCCAGCTATTC +CATGCCTGGGAGAAAGTCACCAATGTTCATGTGGTGAAAAGAGCTCTGTCACTCAAGGAA +TCTATAAACTGGTTCATTACAAGAGACTCAAATTTGGCCCAAACTTTGATTAGGAACATA +ATGTCTCTGACAGGCCCAGATTTTCCTCTGGAAGAGGCCCCTGTCTTCAAGAGGACAGGG +TCTGCCCTGCATAGGTTCAAGTCGGCTAGATACAGTGAAGGGGGGTACTCTTCAGTTTGT +CCAAATCTCCTCTCTCACATCTCTGTCAGTACAGACACGATGTCTGATTTGACTCATGAT +GGGATAAATTATGACTTTATGTTTCAGCCGTTGATGCTTTATGCGCAAACATGGACATCA +GAACTGGTGCAAAAAGATACACGGCTGAAAGATTCTACCTTTCACTGGCATCTTCGATGT +AACAAGTGCATAAGGCCCATCGATGATATCACCCTTGATACCTCTCAGATCTTCGAGTTC +CCAGATGTCTCAAGAAGGATATCTAGGATGGTTTCTGGAGCTGTGCCTCATTTCAGAAAG +CTTCCTGATATTCGTTTGAGACCAGGTGACTTCGAATCTCTAAGTGGTAAAGAGAAGTCT +CGCCACATAGGATCGGCCCAAGGACTCTTGTATTCAATCTTGGTTGCGATCCATGACTCT +GGGTATAATGATGGAACCATTTTCCCTGTCAACATATATAGCAAGGTCTCCCCTAGAGAC +TATTTGAGAGGGCTTGCGAGAGGAGTCTTGATAGGGTCCTCGATTTGCTTCTTGACAAGA +ATGACAAACATTAACATCAATAGACCTCTTGAACTGATCTCAGGAGTGATATCATACATC +CTCTTGAGACTGGACAATCACCCATCCTTGTATATAATGCTCAGAGAGCCATCCCTTAGA +GGAGAGATATTTTCTATTCCCCAAAAGGTCCCCGCTGCTTACCCGACCACGATGAAAGAA +GGTAACAGATCTGTTCTGTGCTACCTCCAACATGTGTTGCGCTACGAACGAGAGGTGATC +ACTGCATCTCCGGAGAATGACTGGTTGTGGATCTTCTCGGACTTTAGAAGCTCTAAGATG +ACATATCTGACTCTCATCACCTACCAGTCTCATCTGTTACTTCAGAAGGTTGAGAAGAAT +CTCTCCAAGAGTATGAGAGCCAACCTACGGCAAATGAGCTCTCTGATGAGGCAGGTGCTG +GGTGGCCATGGTGAAGACACCTTAGAGTCAGACGAAGACATCCAGAGGCTGTTAAGAGAC +TCTCTGCGTAGGACAAGGTGGGTAGACCAGGAAGTGCGCCATGCAGCGAGAACTATGACA +GGAGTTTACAGCCCCACCAAGAAGATGTCACGCAAAGCCGGGTGTTCAGAATGGGTCTGC +TCTGCACAGCAGGTTGCGGTTTCGACCTCATCGAACCCAGCCCCTGTTTCAGAGCTGGAC +GTCAGAGCCCTCTCCAAAAGGCTTCAAAACCCGTTGATCTCTGGACTGAGAGTGGTTCAG +TGGGCGACAGGGGCCCATTATAAGCTCAAACCTATTCTGGATGATCTCAATGTCTTTCCA +TCTCTATGTCTTGTGGTCGGGGATGGGTCAGGGGGGATATCAAGAGCAGTACTTAACATG +TTTCCTGATGCCAAACTCGTATTCAACAGCCTGTTAGAAGTAAATGACCTGATGGCATCA +GGAACACATCCGTTGCCCCCCTCAGCAATCATGAGTGGAGGAGATGACATCATATCCAGG +GTAATTGGCTTCGACTCCATCTGGGAGAAACCTTCTGACTTGAGGAACTTGACCACATGG +AGGTACTTCCAGTCAGTCCAAGAACAAGTAAATATGTCATACGACCTTATCATTTGTGAT +GCAGAGGTCACTGACATTGCATCAATCAACCGAATAACCCTGCTAATGTCTGATTTTGCA +TTGTCCATAGACGGCCCACTTTATCTGGTTTTCAAAACTTACGGTACCATGCTTGTAAAC +CCTGACTACAGAGCAATTCAACATCTATCCAGAGCATTTCCTGCGGTCACTGGATTCATA +ACTCAGATGACCTCGTCCTTCTCATCCGAGCTATATCTCAGATTCTCAAAGCGGGGGAAG +TTCTTCCGGGATGCAGAGTATTTGACTTCTTCCACCCTTCGAGAAATGAGCCTTGTATTG +TTCAACTGTAGCAGCCCCAAGAGTGAGATGCAGAGAGCCCGCTCTCTAAATTACCAAGAT +CTTGTAAGAGGATTCCCAGATGAGATCATATCCAATCCTTACAGCGAAATGATCATAACT +CTGATTGACAGTGATGTAGAATCTTTCCTGGTTCACAAAATGGTAGATGATCTAGAGTTG +CAGCGAGGAACTTTGTCTAAAGTTTCTATCATTATAGCCATCATGATAGTCTTCTCCAAT +AGGGTGTTCAATGTCTCAAAACCACTGACTGACCCTTTATTCTATCCGCCATCTGATCCC +AAGATCTTGAGACACTTCAACATATGCTGCGGCACCATGATGTACTTGTCTACCGCCTTA +GGGGATGTGCCCAGCTTCTCGAGACTTCATGACCTGTACAACAGACCCATAACTTATTAT +TTTAGGAAGCAAGTCATCCGGGGGAGTGTTTACCTGTCCTGGAGTTGGTCTGATGACACT +TCAGTGTTTAAAAGGGTGGCTTGCAACTCTAGCTTGAGTCTCTCATCTCACTGGATCAGG +TTGATTTACAAAATAGTGAAGACCACCAGACTTGTAGGAAGGGTCGAGGACCTGTCCAGG +GAGGTAGAGAGGCACCTTCGGGGGTACAACAGATGGATCACCCTCGATGACATTAGATCC +CGATCATCTCTGTTAGATTATAGCTGCTTGTAAGACTAAGCACTCTCGAGGGAATGTACA +GACTAAGATTATGGGACGGTGTAACCTGAAAAAAACAAGATCCCGATTCATAACTTCTGT +TTACTTGATTGTTTTTCCATCTTTATTGTTTTTTTGTTAAGCGT diff --git a/test/input/TestSnpEff/vphaser2.RBV16.mapped.txt.gz b/test/input/TestSnpEff/vphaser2.RBV16.mapped.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..8e12d4e6f9fe1c70c567d1bbee84e26fa42fec75 GIT binary patch literal 510 zcmVd=Rxvg%ZDDY5Wn?aNcys`qlTC`;Fc5{; zn!Ci*ze?J9dSK}7(BuTO83-Ye`$L{AyX`<$!MIcy>r>UMr;pd~9N)Nbizx49Ewm(= zmES*q6JI4`E zjCq#(T0X(46k2`^j*8>+XZMJ1MBN*8V;qlP2O{_IPmE*pe_}NG5U5$>tt(!XTcQD1 z>1Om?hc?mc_MQmKY~_BN4aHUnFD$`|LUL$Qv!{EuYPyvIr+&~YsGoKSu$?XY|ZO~BJy{ZjJXlEf%l96y87}gn1P@2o(;!1L; zQz7i|nb9jb&hFJYmeFRNlCJfuTk@>k@C=ynJ=V3eN;YY1oTwT#7dyw$T}` zFs6FIYH*t{c8(OlBt>BzG$(0{Y`YqOIePTJ)M8d>G(8|VdAA~4)PMyvR1qjI*)W0C zu62)BSjEs*hz6Zf=55slJsOlm9S%aaGuiIzkUgKv%O3y$0RR630PLilYDEM909;?| An*aa+ literal 0 HcmV?d00001 diff --git a/test/integration/test_intrahost.py b/test/integration/test_intrahost.py index ee913620c..83a8d2030 100644 --- a/test/integration/test_intrahost.py +++ b/test/integration/test_intrahost.py @@ -12,10 +12,18 @@ # module-specific import intrahost +import interhost +import tools.mafft import util.file +import util.vcf import test +from test import TestCaseWithTmp import tools +# third-party +import pytest +from mock import patch + class TestPerSample(test.TestCaseWithTmp): ''' This tests step 1 of the iSNV calling process (intrahost.vphaser_one_sample), which runs V-Phaser2 on @@ -86,3 +94,65 @@ def test_vphaser_one_sample_3libs_and_chi2(self): intrahost.vphaser_one_sample(inBam, refFasta, outTab, vphaserNumThreads=test._CPUS, minReadsEach=6, maxBias=3) expected = os.path.join(myInputDir, 'vphaser_one_sample_3libs_expected.txt') self.assertEqualContents(outTab, expected) + +class TestSnpEff(TestCaseWithTmp): + @pytest.fixture(autouse=True) + def capsys(self, capsys): + self.capsys = capsys + + def test_snpeff(self): + temp_dir = tempfile.gettempdir() + input_dir = util.file.get_test_input_path(self) + + ref_fasta = os.path.join(input_dir,"ref-rabies-JQ685920.fasta") + assembly_fasta = os.path.join(input_dir,"RBV16.fasta") + isnv_calls = os.path.join(input_dir,"vphaser2.RBV16.mapped.txt.gz") + + # align sample to reference to create MSA + msa_fasta = util.file.mkstempfname('.fasta') + expected_msa_fasta = os.path.join(input_dir,"msa.fasta") + args = [ref_fasta, assembly_fasta, msa_fasta, "--localpair", "--preservecase"] + args = interhost.parser_align_mafft(argparse.ArgumentParser()).parse_args(args) + args.func_main(args) + test.assert_equal_contents(self, msa_fasta, expected_msa_fasta) + + # merge (one) VCF to merged vcf + merged_vcf = os.path.join(temp_dir,"merged.vcf.gz") + expected_merged_vcf = os.path.join(input_dir,"merged.vcf.gz") + args = [ref_fasta, merged_vcf, "--isnvs", isnv_calls, "--alignments", msa_fasta, "--strip_chr_version", "--parse_accession"] + args = intrahost.parser_merge_to_vcf(argparse.ArgumentParser()).parse_args(args) + args.func_main(args) + vcf = util.vcf.VcfReader(merged_vcf) + expected_vcf = util.vcf.VcfReader(expected_merged_vcf) + rows = list(vcf.get()) + expected_rows = list(expected_vcf.get()) + #self.assertEqual(rows, expected_rows) + + # run snpEff against merged VCF to predict SNP effects + eff_vcf = os.path.join(temp_dir,"ann_eff.vcf.gz") + expected_eff_vcf = os.path.join(input_dir,"ann_eff.vcf.gz") + args = [merged_vcf, "JQ685920", eff_vcf, "--emailAddress=test@example.com"] + with self.capsys.disabled(): + args = interhost.parser_snpEff(argparse.ArgumentParser()).parse_args(args) + args.func_main(args) + vcf = util.vcf.VcfReader(eff_vcf) + expected_vcf = util.vcf.VcfReader(expected_eff_vcf) + rows = list(vcf.get()) + expected_rows = list(expected_vcf.get()) + #self.assertEqual(rows, expected_rows) + + # create tabular iSNV output + eff_txt = os.path.join(temp_dir,"ann_eff.txt.gz") + expected_eff_txt = os.path.join(input_dir,"ann_eff.txt.gz") + args = [eff_vcf, eff_txt] + args = intrahost.parser_iSNV_table(argparse.ArgumentParser()).parse_args(args) + args.func_main(args) + for outrow, expectedrow in zip(util.file.read_tabfile(eff_txt),util.file.read_tabfile(expected_eff_txt)): + for colout, colexpected in zip(outrow, expectedrow): + # if it casts to float, perform approx comparison + try: + f1=float(colout) + f2=float(colexpected) + self.assertAlmostEqual(f1, f1) + except ValueError: + self.assertEqual(sorted(sorted(colout.split(","))), sorted(sorted(colexpected.split(",")))) diff --git a/tools/snpeff.py b/tools/snpeff.py index 993c7c808..409551df3 100644 --- a/tools/snpeff.py +++ b/tools/snpeff.py @@ -22,9 +22,9 @@ _log = logging.getLogger(__name__) TOOL_NAME = 'snpeff' -TOOL_VERSION = '4.1l' +TOOL_VERSION = '4.3.1t' -URL = 'http://downloads.sourceforge.net/project/snpeff/snpEff_v4_1l_core.zip' +URL = 'http://downloads.sourceforge.net/project/snpeff/snpEff_v4_3t_core.zip' class SnpEff(tools.Tool): @@ -43,7 +43,7 @@ def __init__(self, install_methods=None, extra_genomes=None): def version(self): return "4.1" - def execute(self, command, args, JVMmemory=None, stdin=None, stdout=None): # pylint: disable=W0221 + def execute(self, command, args, JVMmemory=None, stdin=None, stdout=None, stderr=None): # pylint: disable=W0221 if not JVMmemory: JVMmemory = self.jvmMemDefault @@ -59,7 +59,7 @@ def execute(self, command, args, JVMmemory=None, stdin=None, stdout=None): # ] + args _log.debug(' '.join(tool_cmd)) - return util.misc.run_and_print(tool_cmd, stdin=stdin, buffered=True, silent=("databases" in command), check=True) + return util.misc.run_and_print(tool_cmd, stdin=stdin, stderr=stderr, buffered=True, silent=("databases" in command), check=True) def has_genome(self, genome): if not self.known_dbs: @@ -76,7 +76,7 @@ def download_db(self, dbname, verbose=False): self.known_dbs.add(dbname) self.installed_dbs.add(dbname) - def create_db(self, accessions, emailAddress, JVMmemory): + def create_db(self, accessions, emailAddress=None, JVMmemory=None): sortedAccessionString = ", ".join([util.genbank.parse_accession_str(acc) for acc in sorted(accessions)]) databaseId = hashlib.sha256(sortedAccessionString.encode('utf-8')).hexdigest()[:55] @@ -109,27 +109,30 @@ def create_db(self, accessions, emailAddress, JVMmemory): self.execute('build', args, JVMmemory=JVMmemory) def available_databases(self): - command_ps = self.execute("databases", args=[]) - - split_points = [] - keys = ['Genome', 'Organism', 'Status', 'Bundle', 'Database'] - self.installed_dbs = set() - self.known_dbs = set() - for line in command_ps.stdout.decode("utf-8").splitlines(): - line = line.strip() - if not split_points: - if not line.startswith('Genome'): - raise Exception() - split_points = list(line.index(key) for key in keys) - elif not line.startswith('----'): - indexes = split_points + [len(line)] - row = dict((keys[i], line[indexes[i]:indexes[i + 1]].strip()) for i in range(len(split_points))) - self.known_dbs.add(row['Genome']) - if row.get('Status') == 'OK': - self.installed_dbs.add(row['Genome']) - yield row - - def annotate_vcf(self, inVcf, genomes, outVcf, emailAddress, JVMmemory=None): + # do not capture stderr, since snpEff writes 'Picked up _JAVA_OPTIONS' + # which is not helpful for reading the stdout of the databases command + with open(os.devnull, "wb") as devnull: + command_ps = self.execute("databases", args=[], stderr=devnull) + + split_points = [] + keys = ['Genome', 'Organism', 'Status', 'Bundle', 'Database'] + self.installed_dbs = set() + self.known_dbs = set() + for line in command_ps.stdout.decode("utf-8").splitlines(): + line = line.strip() + if not split_points: + if not line.startswith('Genome'): + raise Exception() + split_points = list(line.index(key) for key in keys) + elif not line.startswith('----'): + indexes = split_points + [len(line)] + row = dict((keys[i], line[indexes[i]:indexes[i + 1]].strip()) for i in range(len(split_points))) + self.known_dbs.add(row['Genome']) + if row.get('Status') == 'OK': + self.installed_dbs.add(row['Genome']) + yield row + + def annotate_vcf(self, inVcf, genomes, outVcf, emailAddress=None, JVMmemory=None): """ Annotate variants in VCF file with translation consequences using snpEff. """ diff --git a/util/file.py b/util/file.py index df38b9fb2..66f87e728 100644 --- a/util/file.py +++ b/util/file.py @@ -371,7 +371,7 @@ def read_tabfile_dict(inFile): # truncate the row to the header length, and only include extra items if they are not spaces # (takes care of the case where the user may enter an extra space at the end of a row) row = row[:len(header)] + [item for item in row[len(header):] if len(item)] - assert len(header) == len(row) + assert len(header) == len(row), "%s != %s" % (len(header), len(row)) yield dict((k, v) for k, v in zip(header, row) if v) diff --git a/util/misc.py b/util/misc.py index 0716879af..fdc762cec 100644 --- a/util/misc.py +++ b/util/misc.py @@ -220,7 +220,7 @@ def run(args, stdin=None, stdout=None, stderr=None, shell=False, os.remove(stderr_fn) -def run_and_print(args, stdout=None, stderr=None, +def run_and_print(args, stdout=None, stderr=subprocess.STDOUT, stdin=None, shell=False, env=None, cwd=None, timeout=None, silent=False, buffered=False, check=False, loglevel=None): @@ -238,7 +238,7 @@ def run_and_print(args, stdout=None, stderr=None, args, stdin=stdin, stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, + stderr=stderr, env=env, cwd=cwd, timeout=timeout, @@ -268,7 +268,7 @@ def run_and_print(args, stdout=None, stderr=None, raise(e) else: result = run(args, stdin=stdin, stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, env=env, cwd=cwd, + stderr=stderr, env=env, cwd=cwd, timeout=timeout, check=check) if not silent and not loglevel: print(result.stdout.decode('utf-8')) @@ -281,7 +281,7 @@ def run_and_print(args, stdout=None, stderr=None, 'CompletedProcess', ['args', 'returncode', 'stdout', 'stderr']) process = subprocess.Popen(args, stdin=stdin, stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, env=env, + stderr=stderr, env=env, cwd=cwd) output = [] while process.poll() is None: