diff --git a/DEVELOPMENT_NOTES.md b/DEVELOPMENT_NOTES.md index 16e8738b4..b0ad2fefb 100644 --- a/DEVELOPMENT_NOTES.md +++ b/DEVELOPMENT_NOTES.md @@ -20,6 +20,7 @@ When upgrading the GATK to a new version: - in tools/gatk.py change TOOL_VERSION_TUPLE at the top - in travis/install-gatk.sh change GATK_VERSION at the top - in easy-deploy-script/easy-deploy-viral-ngs.sh +- in docker/rundocker.sh ### (Automated) testing [Travis CI](https://travis-ci.org/broadinstitute/viral-ngs) performs automated unit and integration tests for viral-ngs on each branch and pull request. Unit tests are run on each new branch commit, and longer integration tests are performed on pull requests to help ensure the stability of the `master` branch. Pull requests are gated to ensure merging to `master` is allowed only if all tests pass. The Travis configuration is specified in `.travis.yml`, and relies on files stored within `viral-ngs/travis/`. diff --git a/conftest.py b/conftest.py index af55e2930..52860ba64 100644 --- a/conftest.py +++ b/conftest.py @@ -120,4 +120,4 @@ def pytest_terminal_summary(self, terminalreporter, exitstatus): widths = [max(map(len, col)) for col in zip(*rows)] for row in rows: writer.write(" ".join((val.ljust(width) for val, width in zip(row, widths)))) - writer.line() + writer.line() \ No newline at end of file diff --git a/docker/rundocker.sh b/docker/rundocker.sh index f3e014434..66f55e2f4 100755 --- a/docker/rundocker.sh +++ b/docker/rundocker.sh @@ -3,7 +3,7 @@ # A wrapper script to run viral-ngs docker images # The following paths have to be modified according to end-user environment NOVOALIGN_PATH="/opt/novocraft" # Directory where novoalign.lic license file can befound -GATK_PATH="/opt/GenomeAnalysisTK-3.6" # Directory where the correct GATK jar file can be found +GATK_PATH="/opt/GenomeAnalysisTK-3.8" # Directory where the correct GATK jar file can be found IMAGE_HASH_OR_TAG="local/viral-ngs:1.16.0" # This can be found by running this command 'docker images' DATA_DIR="$1"; shift GID=$(id -g $USER) diff --git a/illumina.py b/illumina.py index fefa3ee7f..6ac812617 100755 --- a/illumina.py +++ b/illumina.py @@ -553,7 +553,6 @@ def get_flowcell(self): log.warn("The provided flowcell ID is longer than 15 characters. Is that correct?") return fc - @util.misc.memoize def _get_rundate_obj(self): """ Access the text of the node in the RunInfo.xml file diff --git a/interhost.py b/interhost.py index 935447eec..84910b415 100755 --- a/interhost.py +++ b/interhost.py @@ -328,7 +328,7 @@ def parser_snpEff(parser=argparse.ArgumentParser()): parser.add_argument("inVcf", help="Input VCF file") parser.add_argument("genomes", nargs='+', help="genome name (snpEff db name, or NCBI accessions)") parser.add_argument("outVcf", help="Output VCF file") - parser.add_argument("emailAddress", + parser.add_argument("--emailAddress", help="""Your email address. To access the Genbank CoreNucleotide database, NCBI requires you to specify your email address with each request. In case of excessive usage of the E-utilities, NCBI will attempt to contact diff --git a/intrahost.py b/intrahost.py index de5615f40..1d7c6d89e 100755 --- a/intrahost.py +++ b/intrahost.py @@ -485,7 +485,31 @@ def merge_to_vcf( guessed_samples = samplenames_from_isnvs + list(samplenames_from_alignments-(refnames|set(samplenames_from_isnvs))) log.info("guessed sample names %s" % guessed_samples) - samples = samples or guessed_samples + samples = samples if samples is not None and len(samples)>0 else guessed_samples + + samp_to_isnv = {} + # if we had to guess sample names, match them up to isnv files + if len(guessed_samples)>0: + matched_samples = [] + matched_isnv_files = [] + for sample in samples: + sample_found=False + for isnvs_file in isnvs: + for row in util.file.read_tabfile(isnvs_file): + if sample==sampleIDMatch(row[0]): + samp_to_isnv[sample] = isnvs_file + sample_found=True + matched_samples.append(sample) + matched_isnv_files.append(isnvs_file) + break + if sample_found: + break + samples = matched_samples + isnvs = matched_isnv_files + else: + samp_to_isnv = dict(zip(samples, isnvs)) + + log.info(samp_to_isnv) # get IDs and sequence lengths for reference sequence with util.file.open_or_gzopen(refFasta, 'r') as inf: @@ -567,13 +591,7 @@ def merge_to_vcf( # to the assemblies # if we had to guess samples only check that the number of isnv files == number of alignments - if len(guessed_samples)>0: - if not (number_of_aligned_sequences - 1) == num_isnv_files: - raise LookupError( - """The number of isnv files provided (%s) and must equal the number of sequences - seen in the alignment (%s) (plus an extra reference record in the alignment). - %s does not have the right number of sequences""" % (num_isnv_files,number_of_aligned_sequences - 1,fileName)) - else: + if len(guessed_samples)==0: if not (number_of_aligned_sequences - 1) == num_isnv_files == len(samples): raise LookupError( """The number of isnv files provided (%s) and must equal the number of sequences @@ -581,8 +599,6 @@ def merge_to_vcf( as well as the number of sample names provided (%s) %s does not have the right number of sequences""" % (num_isnv_files,number_of_aligned_sequences - 1,len(samples),fileName)) - samp_to_isnv = dict(zip(samples, isnvs)) - # one reference chrom at a time with open(refFasta, 'r') as inf: for ref_sequence in Bio.SeqIO.parse(inf, 'fasta'): @@ -611,6 +627,7 @@ def merge_to_vcf( for sampleName in samplesToUse: if seq.id == sampleName: samp_to_seqIndex[sampleName] = seq.seq.ungap('-') + break if not len(samp_to_seqIndex) == len(samplesToUse): raise LookupError( diff --git a/pipes/WDL/workflows/isnvs_merge_to_vcf.wdl b/pipes/WDL/workflows/isnvs_merge_to_vcf.wdl new file mode 100644 index 000000000..e0c68e19d --- /dev/null +++ b/pipes/WDL/workflows/isnvs_merge_to_vcf.wdl @@ -0,0 +1,18 @@ +import "tasks_interhost.wdl" as interhost +import "tasks_intrahost.wdl" as tasks_intrahost + +workflow isnvs_merge_to_vcf { + File reference_fasta + Array[File]+ assemblies_fasta # one per genome + + call interhost.multi_align_mafft_ref as mafft { + input: + reference_fasta = reference_fasta, + assemblies_fasta = assemblies_fasta + } + call tasks_intrahost.isnvs_vcf { + input: + perSegmentMultiAlignments = mafft.alignments_by_chr, + reference_fasta = reference_fasta + } +} diff --git a/pipes/WDL/workflows/tasks/tasks_intrahost.wdl b/pipes/WDL/workflows/tasks/tasks_intrahost.wdl index 43ee4e429..03e878f01 100644 --- a/pipes/WDL/workflows/tasks/tasks_intrahost.wdl +++ b/pipes/WDL/workflows/tasks/tasks_intrahost.wdl @@ -7,7 +7,7 @@ task isnvs_per_sample { Int? minReadsPerStrand Int? maxBias - String sample_name = basename(basename(mapped_bam, ".bam"), ".all") + String sample_name = basename(basename(basename(mapped_bam, ".bam"), ".all"), ".mapped") command { intrahost.py vphaser_one_sample \ @@ -29,61 +29,31 @@ task isnvs_per_sample { } } + task isnvs_vcf { Array[File] vphaser2Calls # vphaser output; ex. vphaser2.${sample}.txt.gz Array[File] perSegmentMultiAlignments # aligned_##.fasta, where ## is segment number File reference_fasta - Array[String] snpEffRef # list of accessions to build/find snpEff database + Array[String]? snpEffRef # list of accessions to build/find snpEff database Array[String]? sampleNames # list of sample names - String emailAddress # email address passed to NCBI if we need to download reference sequences + String? emailAddress # email address passed to NCBI if we need to download reference sequences + Boolean naiveFilter=false command { + set -ex -o pipefail + SAMPLES="${sep=' ' sampleNames}" if [ -n "$SAMPLES" ]; then SAMPLES="--samples $SAMPLES"; fi - intrahost.py merge_to_vcf \ - ${reference_fasta} \ - isnvs.vcf.gz \ - $SAMPLES \ - --isnvs ${sep=' ' vphaser2Calls} \ - --alignments ${sep=' ' perSegmentMultiAlignments} \ - --strip_chr_version \ - --parse_accession - - interhost.py snpEff \ - isnvs.vcf.gz \ - ${sep=' ' snpEffRef} \ - isnvs.annot.vcf.gz \ - ${emailAddress} - - intrahost.py iSNV_table \ - isnvs.annot.vcf.gz \ - isnvs.annot.txt.gz - } - - output { - Array[File] isnvFiles = ["isnvs.vcf.gz", "isnvs.vcf.gz.tbi", "isnvs.annot.vcf.gz", "isnvs.annot.txt.gz", "isnvs.annot.vcf.gz.tbi"] - } - runtime { - memory: "4 GB" - docker: "quay.io/broadinstitute/viral-ngs" - } -} - -task isnvs_vcf_filtered { - Array[File] vphaser2Calls # vphaser output; ex. vphaser2.${sample}.txt.gz - Array[File] perSegmentMultiAlignments # aligned_##.fasta, where ## is segment number - File reference_fasta + providedSnpRefAccessions="${sep=' ' snpEffRef}" + if [ -n "$providedSnpRefAccessions" ]; then + snpRefAccessions="$providedSnpRefAccessions"; + else + snpRefAccessions="$(python -c "from Bio import SeqIO; print(' '.join(list(s.id for s in SeqIO.parse('${reference_fasta}', 'fasta'))))")" + fi - Array[String] snpEffRef # list of accessions to build/find snpEff database - Array[String]? sampleNames # list of sample names - String emailAddress # email address passed to NCBI if we need to download reference sequences - Boolean naiveFilter - - command { - SAMPLES="${sep=' ' sampleNames}" - if [ -n "$SAMPLES" ]; then SAMPLES="--samples $SAMPLES"; fi + echo "snpRefAccessions: $snpRefAccessions" intrahost.py merge_to_vcf \ ${reference_fasta} \ @@ -92,18 +62,18 @@ task isnvs_vcf_filtered { --isnvs ${sep=' ' vphaser2Calls} \ --alignments ${sep=' ' perSegmentMultiAlignments} \ --strip_chr_version \ - ${'--naive_filter' + naiveFilter} \ + ${true="--naive_filter" false="" naiveFilter} \ --parse_accession - + interhost.py snpEff \ isnvs.vcf.gz \ - ${sep=' ' snpEffRef} \ + $snpRefAccessions \ isnvs.annot.vcf.gz \ - ${emailAddress} + ${'--emailAddress=' + emailAddress} intrahost.py iSNV_table \ isnvs.annot.vcf.gz \ - isnvs.annot.txt.gz \ + isnvs.annot.txt.gz } output { diff --git a/pipes/rules/intrahost.rules b/pipes/rules/intrahost.rules index 97792728d..b2113edd9 100644 --- a/pipes/rules/intrahost.rules +++ b/pipes/rules/intrahost.rules @@ -56,7 +56,7 @@ rule isnvs_vcf: UGER = config.get('UGER_queues', {}).get('short', '-l h_rt=04:00:00'), logid = "all", snpEff_ref = " ".join(config["accessions_for_ref_genome_build"]), - email_address = config["email_point_of_contact_for_ncbi"] + email_address = "--emailAddress "+config["email_point_of_contact_for_ncbi"] if config["email_point_of_contact_for_ncbi"] else "" run: shell("{config[bin_dir]}/intrahost.py merge_to_vcf {input.ref_genome} {output.raw_vcf}" + " --samples " + " ".join(read_samples_file(config["samples_assembly"])) @@ -92,7 +92,7 @@ rule isnvs_vcf_filtered: UGER = config.get('UGER_queues', {}).get('short', '-l h_rt=04:00:00'), logid = "all", snpEff_ref = " ".join(config["accessions_for_ref_genome_build"]), - emailAddress = config["email_point_of_contact_for_ncbi"], + email_address = "--emailAddress "+config["email_point_of_contact_for_ncbi"] if config["email_point_of_contact_for_ncbi"] else "", naiveFilter = "--naive_filter" if config["vcf_merge_naive_filter"] else "" run: shell("{config[bin_dir]}/intrahost.py merge_to_vcf {input.ref_genome} {output.raw_vcf}" diff --git a/read_utils.py b/read_utils.py index ab638a61d..969c7bd38 100755 --- a/read_utils.py +++ b/read_utils.py @@ -754,11 +754,11 @@ def main_reheader_bam(args): ''' Copy a BAM file (inBam to outBam) while renaming elements of the BAM header. The mapping file specifies which (key, old value, new value) mappings. For example: - LB lib1 lib_one - SM sample1 Sample_1 - SM sample2 Sample_2 - SM sample3 Sample_3 - CN broad BI + LB lib1 lib_one + SM sample1 Sample_1 + SM sample2 Sample_2 + SM sample3 Sample_3 + CN broad BI ''' # read mapping file mapper = dict((a + ':' + b, a + ':' + c) for a, b, c in util.file.read_tabfile(args.rgMap)) diff --git a/requirements-conda.txt b/requirements-conda.txt index 0ac3ecd0c..a760dca8c 100644 --- a/requirements-conda.txt +++ b/requirements-conda.txt @@ -22,7 +22,7 @@ picard=2.18.9 pigz=2.4 prinseq=0.20.4 samtools=1.7 -snpeff=4.1l +snpeff=4.3.1t spades=3.11.1 tbl2asn=25.6 trimmomatic=0.38 diff --git a/test/__init__.py b/test/__init__.py index d9489fe4f..c85b8866d 100644 --- a/test/__init__.py +++ b/test/__init__.py @@ -118,4 +118,4 @@ def inputs(self, *fnames): def assert_none_executable(): testDir = os.path.dirname(__file__) assert all(not os.access(os.path.join(testDir, filename), os.X_OK) for filename in os.listdir(testDir) - if filename.endswith('.py')) + if filename.endswith('.py')) \ No newline at end of file diff --git a/test/input/TestSnpEff/RBV16.fasta b/test/input/TestSnpEff/RBV16.fasta new file mode 100644 index 000000000..f87504252 --- /dev/null +++ b/test/input/TestSnpEff/RBV16.fasta @@ -0,0 +1,200 @@ +>RBV16-1 +ACAAAATCAGAGAAGAAGTAGACAGTATCATCTACAAAAAGAAAATGTAACACCTCTACA +ATGGATACCGACAAAATTGTATTCAAAGTCAATAACCAGGTTGTCTCTCTAAAACCTGAA +ATTATTGTAGATCAATATGAGTACAAATACCCGGCTATCAAAGACTTAAAAAAGCCCAGT +ATCTCCCTGGGAAAAGCTCCTGATTTGAACAAGGCGTATAAGTCAATTTTGTCCGGTATG +AATGCAGCTAAGCTCGACCCTGACGATGTGTGCTCTTACTTGGCAGCTGCAATGCAGTTC +TTTGAAGGAACATGTCCAGAAGACTGGACTAGCTATGGAATCTTGATTGCAAGGAAAGGA +GACAAGATAACTCCAAACTCTCTCGTAGACATAAAACGTACAGATGTAGAAGGGAACTGG +GCTCTAACAGGAGGAATGGAGTTGACTAGGGATCCCACCATTCCAGAACATGCATCTTTG +GTTGGTCTTCTCCTGAGTTTGTATCGATTGAGCAAAATATCCGGACAGAACACAGGCAAT +TATAAGACAAATATCTCTGATCGTATAGAGCAGATTTTTGAGACGGCCCCCTTTGTGAAG +ATCGTGGAACATCACACTTTGATGACAACTCACAAAATGTGCGCTAACTGGAGCACCATA +CCGAACTTTAGATTCCTAGCCGGAACTTATGACATGTTTTTCTCTCGGATTGAACATCTA +TATTCAGCAATCAGAGTGGGTACAGTTGTCACTGCTTACGAGGACTGCTCAGGGCTAGTA +TCCTTTACAGGTTTTATAAAGCAGATAAACCTTACAGCAAAGGAAGCAATACTTTATTTC +TTCCACAAAAATTTTGAGGGAGAGATAAGAAGAATGTTTGAGCCGGGACAGGAAACTGCA +GTCCCTCACTCCTATTTCATCCATTTCCGGTCTTTGGGCCTTAGTGGGAAATCTCCATAT +TCGTCAAGTGCAGTTGGTCACGTGTTCAACCTCATTCACTTTGTTGGATGCTATATGGGT +CAAGTGAGGTCTTTGAATGCAACGGTTATTGCCACATGTGCTCCACATGAGATGTCTGTT +CTCGGGGGTTATTTGGGGGAGGAGTTCTTTGGCAAGGGGACTTTTGAGAGAAGATTCTTC +AGAAACGAGAAGGAACTTCAGGACTATGAAGCAGCAGAGTTGACAAAGACTGAGGTCGCT +CTGGCAGACGACGGAACAGTCAATTCTGACGATGAAGACTACTTCTCTGGTGAAACCAGG +AGTCCAGAGGCGGTCTATACTCGGATCATGATCAATGGGGGCCGACTCAAAAGATCACAT +ATAAGAAGGTATGTATCAGTCAGTTCCAATCATCAAGCTCGCCCCAATTCATTTGCTGAA +TTTCTAAACAAGACATATTCTAACGACCCGTAGGGAGTCGAACTTCAAGATTGTCAACAA +TAATAAATTGTTTAATTCCTCCACGAAAAAAACTAACACCCCTCCTTTTGAACCATCCCA +AGCATGAGCAAGATTTTTGTCAACCCAAGTGCGATCCGGGCCGGCCTTGCTGACCTAGAG +ATGGCAGAGGAGACTGTAGATCTGATTGCCAGGAACATCGAGGACAATCAGGCTCATCTC +CAAGGAGAGCCTATAGAAGTAGATAGTCTGCCCGATGACATGAGACGGCTCCACTTGGAT +AGTGAAAAACCGTCTGGTTTTGACAAGGTGACAAAAGAAGGGGAGAGCAAGTGTCATGAA +GACTTTCAGATGGATGAAGGGGAGGACCCCAGCCTCTTGTTCCAGTCATACCTGGATAAT +GTCGGAGTTCAAATAGTCAGGCAAATGAGGTCAGGAGAGAGATTCCTTAAAATATGGTCT +CAGACTGTGGAGGAGATCATATCCTATGTCATGATCAATTTCCCAGGCTCTTTGGGGAGG +CCTTCTGAAGACAAGGCCACTCAAACTGCCAATCGGGAACCCAAGAAAGGAGTGGCATCA +GTTTCGTCTCAACTCGAAGGTCAATCATCCAAAGCGAGAGCGGCAGCCCAAACTGCCTCC +GGTCCACCTGCCCTAGAATGGTCTGCCACCAACGAAGAAGACGATTTGTCTGTGGAGGCA +GAGATAGCTCATCAAATTGCCGAGAGCTTCTCCAAGAAGTACAAATTCCCCTCTCGATCA +TCAGGGATATTCTTGTATAATTTTGAGCAGTTAAAGATGAACCTTGATGACATAGTCAAG +GAATCAAAGAATGTGCCTGGTGTTACCCGCTTGGCCCATGAAGGGTCTAAACTCCCTCTA +AGATGTGTACTTGGGTGGGTTGCTCAAGCCAACTCCAGAAAGTTCCAGCTGCTGGTCGAG +CCTGACAAGCTAAATAAAATAATGCAAGACGACCTAAACCGTTATTTGTCTCGTCAACTA +AATTTGTAGCCTCAGTCCCATTATGCAATCAAAACCAGTCTGATCCAAAGTCAACGTGAA +AAAAACAGGCAACACCACTAATACAATGAATTTCCTACGCAAGATAGTGAAGAACTGTAG +AGATGAGGACGATCAGAAGCCTTCTCTCGCATCGGCCCCCCCAGATGATGATGATCTGTG +GTTGCCCCCTCCAGAATATGTCCCATTGAAGGAGCTCACAGGGAAGAAGAACATGAGAAA +CTTCTGTGTTAATGGGGAGGTCAAGGTGTGTAGTCCAAACGGATATTCCTTTAGGATCTT +GCGTCACATTCTGAAATCGTTCGATGAGATCTATTCCGGAAATCAAAGAATGATTGGGTT +AGTCAAAGTTGTGGTCGGATTGGCCTTGTCTGGAGCCCCAGTCCCCGAGGGCATGAACTG +GGTTTACAAACTAAGGAGAACTCTTATTTTCCAATGGGCAGACTCTAGGGGTCCTCTGGA +AGGGGAAGAGTTGGAGTACTCTCAGGAAATTACCTGGGACGATGACACTGAATTTGTAGG +GTTGCAAATAAGGGTGAGTGCTAGACAATGCCATATCCAAGGTAGAATCTGGTGCATTAA +CATGAACTCTAGAGCATGTCAATTGTGGTCTGATATGTCCCTTCAAACTCATAGATCTGA +GGAGGACAAAGACTCCTCAGTCCTTCTAGAGTAGTCGAATTATATCTCACAAGTTCCTCA +ATTGTCCACCTCTGGAGGAGAGAACACATGGGCTCAACTCCAACCTTCAGGAGCAATAGA +ACAAAAACATGTTATGGTGCCGTTGAATCGCTGCATTTTATCAGAGTCAAATCAATTACA +ATTGCACTTTAAGCCTCTTGGATGTGAAAAAAACTATTAACATCCCTCAAAAGACCTGAG +GAAAGATGGTCCCTTGGGCCCTTCTGTTTGTGCCTTTTCTGATCTCTTCATTGTGTTTTG +GGAAGTTCCCTATCTACACAATACCAGACAAGCTAGGTCCTTGGAGTCCTATCGACATAC +ATCATCTCAGTTGTCCCAATAATCTTGTTGTGGAAGACGAGGGGTGTAATAGTCTGTCGG +GGTTTTCTTACATGGAACTAAAAGTGGGATACATCTCTGCCATAAAAGTGAACGGGTTCA +CTTGTACTGGTGTCGTGACGGAAGCCGAGACCTACACTAACTTCGTCGGTTATGTCACCA +CCACATTCAAGAGAAAACACTTTCGCCCGATGCCCGATGCATGTAGATCCGCATACAATT +GGAAAATGGCAGGCGATCCTAGGTATGAAGAGTCCCTTCACAACCCATACCCTGATTATC +ATTGGCTGCGGACAGTTAAAACCACCAAGGAGTCTCTTATCATCATCTCGCCAAGTGTGG +CTGACCTAGACCCGTATGACAAATCCCTTCATTCAAGGATTTTTCCTGGGGGGAAATGCA +CGGGTCTAACAGTCTCTTCCACCTACTGCTCGACCAACCATGACTACACCATCTGGATGC +CTGAAAAAGCAGGGCTCGGGACATCTTGTGACATCTTCACCAATAGTAAAGGGAAGAGAG +CATCTAAAGGAGGCAAGACTTGTGGATTTGTGGACGAGAGAGGTTTGTATAAGTCCTTGA +AAGGAGCTTGTAAGCTCAAACTGTGCGGAGTTTCTGGGCTTAGACTTATGGATGGAACTT +GGGTTGCGATTCAGACATTGGATGAAACCAAATGGTGCTCTCCTGATCAACTGGTGAATC +TGCATGACTTCCACTCGGATGAGCTTGAGCATCTTGTTGTAGAGGAGTTGGTTAGAAAGA +GGGAGGAATGTCTGGATGCATTAGAATCCATCATGACCACCAAATCAGTAAGCTTCAGAC +GTCTAAGCCACCTGAGAAAACTAGTTCCTGGGTTTGGGAAGGCATACACCATATTCAACA +AAACCCTAATGGAGGCCGATGCCCACTACAAGTCCGTTCGGACTTGGAGCGAGATCATCC +CCTCAAAAGGGTGTTTGAGAGTAGGGGGGAGATGTCATCCTCATGTAAATGGAGTATTTT +TCAATGGCATCATTCTAGGTCCGGACGGGCATGTGTTGATCCCAGAAATGCAGTCATCCC +TTCTCCAACAACATGTGGAACTGTTGGAGTCCTCTGTGATCCCCCTCATGCACCCCCTGG +CAGATCCTTCAGCAGTTTTCAAAGATGGTGACGAGGCAGAGGATTTTGTTGAGGTTCATC +TCCCAGATGTTCACAAACAGATCTCAGGGGTGGACCTAGGCCTCCCAAGCTGGGGAAAGT +ATATGCTGATGATTGCAGGTGCTCTAACGACTCTAATGCTGTTCATCTTCTTGATGACAT +GTTGCAAAAGAGTCAACAGGACAAAGTCAATACGACAAGGCCCCAGAGAGATGGAAAGAA +AAGTATCATTTACTCCCCAAAACAAAAAAGTCGTATCTTCATGGGAGTCTTACAAGAGCG +GAGGCGAGACCAAGCTGTAAGGAGGGCTGCCCCCCCCACACACTATGCTCGGAAAACTGT +TCCCCTCTGATACATGGAAGAATGTCCTGCCTTGACAATCCCCCACGGACTCTGTCCAAC +AGGGTAAATTTCGAAGTCAAGAAACTTTCATCGATCATCTCACTCGACCAGACACATTCA +GGTGGACTTTGATGATGTATGAAGCATTTTTACAGTATCAGCGACTAATGGTGCTCTCAC +CCTCTAAGGACTGGTACTAAAGGTAGCGGACAGGCTGACTGACATCTCAGACAACCCTGT +TTCTTAGCTTGGGCAGAGGTTGTGATAAGCTCCTCTACCTTAAACTAAAACAGTGATCAG +CTGAGAAAACTGATTTGCCTCCTATGAAGGACACAAGCAATAGATCACAATCATCTCACA +TCCCAATAAGTTGTGCATAACTACAAAGGGCTGGGCCATCTAATCTCCTCAATCAAGAAA +AAAACTGTGAGACAGAGAAATCCTCAACAACACTTCTCATCCTGAACACTGCACCATGAT +GATTGATCCAGGGGAGGTTTATGATGACCCCATTGATCCAGTCGAGTCTGAGAGTGAGCC +AAGAGGAAACTCCAATATCCCCAACATCTTGCGAAATTCTGACTACAATCTCAACTCCCC +TCTGATCGAGGATTCAGCCAAGCTGATGTTAGAATGGCTGAAAACAGGAAATAGACCTCT +CCGGATGACCCTGACCGACAATTGTTCTCGGTCTTACAAAGTTCTGAAAGATTACCTCAA +GAAAGTGGATCTAGGATCTCTCAAAGTGGGCGGAGCTGCAGCACAATCTATGATCTCTCT +TTGGTTATATGGTGCTCACTCTGAATCAAATAGGAGCAGGAGGTGTATGACTGACTTAGC +TCAATTCTACTCCAAATCTTCCCCTATAGAGAAGCTGTTAAACTTCACACTCGGGAACAG +AGGGCTGAGAATCCCCCCTGAGGGAGTCTTAAGCTGTCTTGAGAGGGTGGATTACGATAA +GGCATTTGGGAGGTATTTGGCCAATATATATTCCTCCTATTTGTTCTTTCATGTGATTAC +CCTTTACATGAATGCCCTAGATTGGGATGAGGAGAAGACTATCTTGGCACTGTGGAGGGA +ATTGACATCAGTGGATATGGGAAAGGACTTGGTCAAGTTTAAAGATCAGATATGGGGACT +TCTGATTGTGACTAAAGACTTCATATATTCACAAAGCTCTCACTGTCTCTTTGACAGGAA +CTATACGCTTATGCTAAAAGACCTTTTTTTGTCCCGGTTCAACTCTCTGATGATTTTGCT +GTCCCCCCCGGAACCCAGATACTCAGATGACTTGATATCTCAGCTGTGTCAGCTGTACAT +AGCAGGAGATCAAGTGTTGTCCATGTGCGGAAACTCTGGTTATGAAGTCATAAAAATCTT +GGAGCCGTATGTTGTAAACAGCTTGGTCTATAGAGCGGAAAAATTCAGGCCTCTTATTCA +CTCTCTGGGGGACTTCCCTGTTTTTATAAAGGACAAGGTGAATCAACTTGAGGGGACGTT +TGGTCCTAGTGCGAAGAGATTCTTTAAAGTCTTGGATCAATTCGACAACATACATGATTT +GGTCTTTGTATACGGTTGTTACAGGCATTGGGGGCACCCTTACATAGACTATAGAAAGGG +TCTGTCAAAATTGTATGACCAGGTCCACATTAAAAAGGTGATAGATAAGTCCTATCAGGA +GTGTTTAGCGAGTGACCTGGCCAAAAGAATCCTCAGGTGGGGGTTCGACAAATACTCCAA +ATGGTATCTAGATCCACGACTTCTTGCGAGGGATCACCCCCTGACTCCTTATATCAAGAC +CCAGACATGGCCTCCCAAGCACATAGTGGATTTGGTAGGTGATACCTGGCATAGACTCCC +GATCACCCAGATCTTCGAGATCCCTGAATCAATGGACCCATCAGAGATATTAGATGACAA +ATCACACTCTTTCACTAGAACAAGGCTAGCGTCATGGCTTTCGGAAAACAGAGGAGGACC +GGTTCCAAGTGAGAAGGTCATTATTACGGCTCTCTCCAAACCGCCTGTCAACCCTAGAGA +GTTTTTGAAATCTATAGACCTTGGGGGGCTGCCAGACGAAGACTTAATCATCGGCTTGAA +ACCCAAAGAAAGGGAGCTGAAGATCGAAGGCCGATTTTTTGCTTTAATGTCTTGGAATTT +AAGGCTATACTTCGTTATTACAGAGAAGCTCCTGGCCAATTATATATTACCACTCTTTGA +TGCATTAACCATGACCGACAATTTGAACAAGGTGTTCAAAAAGTTGATTGATCGAGTCAC +CGGACAAGGGCTCTTAGATTATTCGAGGGTTACATATGCTTTTCACTTGGACTATGAGAA +GTGGAACAATCATCAGAGACTAGAATCGACTGAGGATGTATTCTCCGTCCTTGATCAAGT +GTTTGGATTAAAAAGGGTGTTCTCCAGAACTCATGAGTTTTTCCAGAAGTCTTGGATATA +TTACTCAGATAGATCTGATCTTATAGGATTGTGGGAGGACCAGATATACTGTTTGGACAT +GTCAAATGGCCCGACGTGCTGGAACGGCCAAGATGGCGGGTTAGAGGGGTTACGACAGAA +GGGCTGGAGTCTGGTCAGCCTACTGATGATAGATCGAGAGTCTCAGACCAGGAACACAAG +AACTAAGATACTAGCCCAGGGAGACAACCAAGTTCTATGTCCGACATACATGTTATCGCC +TGGGCTTTCTAGAGAGGGCCTCCTCTACGAGTTGGAGAGTATATCAAGAAACGCACTCTC +AATATATCGAGCCATCGAGGAAGGGGCATCTAAACTGGGGCTCATTATAAAGAAGGAGGA +GACAATGTGCAGCTATGACTTTCTCATTTATGGAAAAACTCCCTTATTTCGAGGTAACAT +TCTGGTGCCTGAATCCAAAAGGTGGGCTAGAGTCTCCTGTATCTCTAACGACCAAATAGT +CAACCTCGCTAATATAATGTCAACAGTGTCTACCAATGCTTTGACTGTTGCTCAACACTC +TCAGTCTTTGATTAAACCGATGAGGGACTTTCTGCTTATGTCAGTGCAAGCCGTTTTTCA +TTACTTGCTATTTAGCCCCATTTTAAAAGGCAGAGTTTACAAAATACTGAGTGCTGATGG +GGATCATTTTCTTTTAGCTATGTCTAGAATAGTTTACCTTGACCCCTCATTGGGGGGTGT +GTCTGGAATGTCTCTTGGGAGGTTTCACATACGCCAGTTTTCAGACCCGGTCTCTGAGGG +GTTATCTTTTTGGAGAGAGATTTGGTTAAGTTCAAATGAGTCTTGGATCCATGCACTTTG +TCAAGAAGCAGGAAATCCGGATTTGGGAGAGAGAACACTGGAAAGCTTCACACGCCTTCT +CGAGGATCCCACTACCCTGAATATAAAAGGAGGGGCCAGTCCAACCATTCTTCTCAAGGA +TGCAATCAGAAAGGCCCTGTATGATGAAGTGGATAAGGTGGAGAACTCCGAGTTTAGAGA +AGCAATCCTCTTGTCCAAGACCCATAGGGACAACTTTATACTTTTCTTGAGATCTGTTGA +GCCTCTCTTTCCTAGATTTCTCAGTGAACTCTTCAGCTCTTCCTTCCTGGGGATTCCTGA +ATCGATTATTGGGCTGATCCAAAATTCTAGAACGATAAGAAGACAGTTTAGGAAGAACCT +CTCTAGAACCTTAGAAGAGTCTTTCTATAACTCAGAGATTCATGGGATCAATCGGATGAC +ACAGACTCCCCAGAGAATAGGAAGAGTTTGGGCCTGCTCTTCTGAGAGGGCAGATCTCCT +AAGAGAAATCTCGTGGGGGAGAAAGGTGGTTGGTACAACCGTCCCTCACCCCTCTGAGAT +GTTGGGGTTGCTTCCTAAATCTTCAATCTCCTGTACTTGTGGCGCAACAGGAGGGGGAAA +TCCTCGAATATCAGTGTCTGTACTCCCGTCCTTTGATCAGTCGTTCTTTTCCAGAGGCCC +TCTGAAGGGATACCTGGGCTCATCCACATCTATGTCAACCCAGCTGTTCCATGCTTGGGA +GAAGGTCACCAATGTTCATGTGGTGAAAAGGGCCCTTTCACTCAAAGAGTCCATAAATTG +GTTCGTCACAAGAAACTCCAATTTGGCTCAAACTTTAATCAGGAATATAATGTCTCTGAC +GGGACCAGACTTCCCGCTGGAAGAGGCCCCCGTCTTTAAGAGAACAGGATCAGCTCTGCA +CAGGTTCAAGTCAGCTAGGTACAGTGAAGGGGGTTATTCCTCTGTCTGTCCCAATCTCCT +CTCTCATATTTCCGTCAGCACAGACACGATGTCTGACCTGACTCAAGATGGGAAGAACTA +TGACTTTATGTTCCAGCCGCTGATGCTTTACGCACAGACATGGACATCAGAACTGGTGCA +GAAGGACATACGGCTGAGAGACTCCACCTTTCACTGGCATCTTCGATGCAATAAGTGTAT +AAGGTCCATCGATGACATCACTCTAGAGACTTCCCAGATCTTTGAATTCCCGGATGTTTC +GAAAAGGATATCTAGGATGGTCTCTGGAGCAGTGCCTCACTTTCAAAAACTTCCTGATAT +TCGTCTAAGACCTGGAGACTTTGAGTCTCTAAGTGACAAAGAGAAGTCACGCCACATAGG +GTCGGCTCAGGGGCTCTTGTATTCGATCCTGGTTGCGATCCACGACTCTGGGTACAACGA +TGGAACCATTTTTCCTGTCAACATATATAGCAAGGTCTCCCCGAGAGACTATTTAAGAGG +TCTCGCCAGAGGAATCTTAATAGGGTCTTCTATCTGCTTTCTAACAAGAATGACAAACAT +CAACATCAACAGACCCCTTGAACTAATCTCAGGAGTGATCTCATACATCCTCCTTAGATT +AGACAATCACCCATCCTTATACATCATGCTCAGAGAACCGTCTCTGAGAGGAGAAATATT +TTCTATTCCCCAAAAGATCCCCGCCGCTTACCCAACCACAATGAAAGAGGGCAACAGATC +TATCCTGTGTTACCTCCAGCATGTGCTCCGCTATGAGCGGGAGGTAATTACAGCATCCCC +GGAGAATGATTGGTTATGGATCTTCTCTGACTTCAGAAGTTCCAAAATGACCTATTTAAC +CCTTATCACCTATCAGTCTCATCTTTTACTTCAGAGGGTTGAGAAGAATCTCTCTAAAAA +TATGAGGGCCAACCTACGACAGATGAGCTCTCTCATGAGACAAGTATTGGGCGGGCATGG +CGAAGACACCTTAGAATCAGACGAGGACATTCAGAGGTTGTTGAAAGACTCATTACGTAG +GACAAGATGGGTAGATCAAGAGGTGCGTCATGCAGCCAGAGCCATGACAGGGGGTTATAG +CCCCAATAAGAAGATGTCTCGCAAAGCGGGGTGTTCAGAATGGGTCTGCTCTGCCCAACA +GGTTGCTGTTTCAACCTCAGCAAATCCAGCCCCTGTCTCTGAGCTGGACATCAGGGCTCT +CTCCAAAAGACTTCAAAACCCGTTGATTTCAGGTCTGAGAGTTGTTCAGTGGGCAACAGG +CGCTCATTACAAACTCAAACCTATTCTTGATGATCTCAATGTGTTCCCATCCTTATGTCT +TGTGGTCGGAGACGGGTCAGGGGGGATCTCAAGGGCTGTACTCAATATGTTTCCGGACGC +TAGGCTCGTGTTCAACAGCCTGTTGGAAGTGAATGACCTGATGGCTTCAGGGACACATCC +GTTGCCTCCTTCAGCAATCATGAGCGGAGGGGATGATATCATATCTAGGGTGATTGACTT +TGACTCCATCTGGGAAAAGCCTTCTGACTTAAGGAACTTGACGACATGGAGGTACTTCCA +GTCGGTTCAAGAACAAGTGAATATGTCCTATGATCTTATTATTTGTGACGCAGAGGTCAC +TGACATCGCATCAATTAATCGAATAACTCTACTAATGTCTGATTTTGCATTATCAATAGA +CGGCCCACTTTATTTAGTTTTCAAAACTTATGGAACCATGCTCGTGAATCCTGATTACAG +AGCCGTTCAACATCTGTCCAGAGCATTTCCCACAGTCACAGGATTCATAACCCAGATGAC +ATCATCCTTCTCGTCTGAGCTATACCTCAGATTCTCTAAAAGGGGAAAATTTTTCCGAGA +TGCAGAATACTTGACTTCTTCTACTATTAGGGAGATGAGCCTTGTATTGTTCAACTGCAG +CAGCCCAAAAAGTGAGATGCAGAGGGCCCGCTCTCTGAATTACCAGGACCTTGTAAGAGG +ATTTCCTGAGGAGATCATATCCAACCCATATAATGAGATGATCATAACTCTGATTGACAG +TGATGTGGAATCTTTCCTGGTTCATAAGATGGTTGATGACCTAGAGTTGCAGCGAGGAAC +TTTGTCTAAAGTCTCCATTATCGTAGCCATCATGATAGTCTTTTCCAACAGGGTGTTTAA +TGTCTCGAAACCGTTGACTGACCCTTTATTCAATCCGCCATCTGACCCCAAAATCTTGAG +ACACTTCAATATTTGCTGCAGTACCATGATGTACTTGTCTACTGCTCTGGGGGATGTTCC +AAGCTTTGCTAGACTTCATGACCTGTATAACAGACCAATAACCTACTATTTCGGGAAGAA +AGTTATCCGAGGGAACATTTATTTATCCTGGAGTTGGTCTGACGACACTTCAGTGTTCAA +AAGGGTGGCTTGCAATTCTAGTTTGAGCCTCTCGGCTCACTGGATAAGGCTGATTTACAA +AATAGTGAAGACTACCAGACTTGTGGGAAATACGGAGAATCTATCCAGGGAGGTCGAAAA +GCACCTTCGAGGGTACAACAGGTGGATTACCCTAGACGACATAAAATCCAGATCATCTCT +TCTAGATTACAGCTGCCTATAAAACAGGGCATTTGGGGAGAAATACATGGACCAACACCT +TGAGACAGTATGCCCTGAAAAAAACAAGACCCTGATTCATAACCTCTATTTGCTTGATGG +TTTTTTTTGTCTTTGTTGTTTTTTTGTTAA diff --git a/test/input/TestSnpEff/ann_eff.txt.gz b/test/input/TestSnpEff/ann_eff.txt.gz new file mode 100644 index 000000000..8781203ef Binary files /dev/null and b/test/input/TestSnpEff/ann_eff.txt.gz differ diff --git a/test/input/TestSnpEff/ann_eff.vcf.gz b/test/input/TestSnpEff/ann_eff.vcf.gz new file mode 100644 index 000000000..19361c6b3 Binary files /dev/null and b/test/input/TestSnpEff/ann_eff.vcf.gz differ diff --git a/test/input/TestSnpEff/ann_eff.vcf.gz.tbi b/test/input/TestSnpEff/ann_eff.vcf.gz.tbi new file mode 100644 index 000000000..ad0058178 Binary files /dev/null and b/test/input/TestSnpEff/ann_eff.vcf.gz.tbi differ diff --git a/test/input/TestSnpEff/merged.vcf.gz b/test/input/TestSnpEff/merged.vcf.gz new file mode 100644 index 000000000..51f9a9fa9 Binary files /dev/null and b/test/input/TestSnpEff/merged.vcf.gz differ diff --git a/test/input/TestSnpEff/merged.vcf.gz.tbi b/test/input/TestSnpEff/merged.vcf.gz.tbi new file mode 100644 index 000000000..28d55b047 Binary files /dev/null and b/test/input/TestSnpEff/merged.vcf.gz.tbi differ diff --git a/test/input/TestSnpEff/msa.fasta b/test/input/TestSnpEff/msa.fasta new file mode 100644 index 000000000..eedd6deb8 --- /dev/null +++ b/test/input/TestSnpEff/msa.fasta @@ -0,0 +1,400 @@ +>JQ685920 +ACGCTTAACAACAAAATCAGAGAAGAAGTAGACAGTGTCGTCTACAAAGCAAGAATGTAA +CACCCCTACAATGGATGCCGACAAGATTGTGTTTAAAGTCAATAATCAGGTGGTCTCTCT +GAAGCCTGAAATTATAATAGATCAATATGAATACAAGTACCCGGCTATCAAAGATTTGAA +AAAGCCCAGTATAACCTTAGGAAAAGCCCCTGACTTGAACAAAGCATACAAGTCAATTCT +GTCCGGCATGAATGCAGCCAAGCTTGACCCTGATGATGTATGCTCTTATCTAGCAGCCGC +AATGCAGTTCTTTGAGGGGACATGTCCTGATGACTGGACCAGCTATGGAATCCTGATTGC +ACGGAAGGGAGACAAGATTACTCCAAATTCTCTTGTGGACATAAAACGTACTAATGTGGA +AGGGAACTGGGCTTTGACAGGGGGTATGGAGTTGACGAGAGACCCCACAGTTTCGGAGCA +TGCATCCCTAGTTGGTCTTCTCTTGAGTCTTTACAGGTTAAGCAAAATATCTGGACAAAA +CACCGGCAATTACAAAACGAACATCGCAGACAGAATAGAGCAGATTTTCGAGACGGCCCC +CTTTGTAAAGATCGTAGAGCATCATACCTTGATGACAACCCACAAAATGTGTGCTAACTG +GAGTACTATACCGAACTTCAGATTTCTGGCCGGAACCTACGATATGTTTTTCTCGCGGGT +TGAACATCTGTATTCAGCAATTAGAGTGGGTACAGTTGTAACAGCCTATGAGGACTGCTC +AGGGTTGGTGTCGTTTACAGGGTTTATTAAGCAAATAAATCTCACTGCAAAAGAAGCAAT +ACTATATTTCTTCCACAAGAACTTCGAGGAAGAGATAAGAAGAATGTTCGAGCCGGGGCA +AGAGACGGCAGTTCCTCACTCCTATTTCATACATTTTCGTTCTTTGGGCCTGAGTGGGAA +ATCTCCGTATTCATCAAATGCAGTTGGTCACGTGTTCAACCTCATTCACTTTGTTGGATG +TTATATGGGTCAAGTGAGATCTTTGAACGCAACGGTTATTGCTACATGTGCCCCGCATGA +GATGTCTGTTCTTGGGGGTTATTTGGGGGAGGAGTTTTTCGGAAAAGGGACTTTTGAGAG +AAGATTCTTTAGGGATGAGAAAGAACTTCAGGAATATGAGGCAGCTGAAGCAACGAAGAC +TGAAATAGCCTTGGCGGATGACGGAACAGTCAATTCTGATGATGAGGACTACTTCTCTGG +TGAAACTAGGAGCCCGGAGGCAGTTTATACTCGAATCATGATGAATGGGGGTAGATTAAA +AAGATCACACATAAGGAGATATGTCTCAGTCAGTTCCAATCATCAAGCTCGCCCTAATTC +GTTCGCTGAGTTTCTAAGTAAGACATACTCTAGTGATTCATAAAGAATTGAACAACAGGA +TTGTAAACATTAACAAATTGTGTACATCCTTCACGAAAAAAACTAACACCCCTCCTCTTG +AACCATCTCAGACATGAGCAAGATTTTTGTTAACCCAAGTGCAATCAGGGCAGGCTTGGC +TGATCTGGAGATGGCAGAGGAAACTGTAGATCTAGTTGCCAAGAACATCGAAGATAATCA +AGCTCATCTCCAAGGAGAACCCATAGAGGTGGACAGTCTTCCTGAAGACATGAGACGGCT +TCAGTTAGACGATGAAAAACCATCTGGCCTCGGTGTGATTGCCAAAGCAGGGGAGAGCAA +ATGTCAGGAAGACTTTCAGATGGACGAGGGGGAGGACCCCGCCCTCTTGTTCCAGTCGTA +CCTAGACAATATTGGAGTTCAAATGGTCAGACAAATGAGATCAGGAGAGAGGTTCCTCAA +GATATGGTCTCAGACTGTTGAGGAGATCATATCCTATGTCACGGTCAATTTCCCCAGTCT +GCCAAGGAAAGCCTCAGAAGACAAGGCTACCCAGACTGCCAACCAGGAACTCAAGAAAAA +GACAATGTCTGTTTCTTCTCAGCGAGAAAGTAAATCATCTAAAGCTAAAATGGCGGCCCA +AACCGCCTCCGGTCCTCCTGCTCTAGAATGGTCCGCCACAAACGAGGAGGATGACCTATC +TGTGGAGGCTGAAATTGCTCACCAGATTGCTGAGAGCTTCTCTAAGAAGTACAAATTCCC +CTCTCGATCATCAGGGATATTCTTGTATAATTTTGAGCAGTTGAAGATGAACCTTGATGA +CATCGTTAAGGAGTCGAAAAATGTGCCAAGCGTAACCCGCTTAGCCCATGACGGATCCAA +ACTCCCTCTGAGGTGTGTGCTGGGGTGGGTTGCTCTAGCCAACTCCAAAAAGTTCCAGCT +GTTAGTTGAGCCTGACAAGCTAAACAAAATAATGCAAGACGACCTGAATCGTTATGTGTT +CTGCTGACCAAACCCTCAAACTCAGTCGTGCTATGCGATCAAATCCAGCCTGCTCCAAAC +CCAACGTGAAAAAAACAGGCAACACCACTGATAAAATGAACTTTCTACGCAAGATAGTAA +AGAACTGTAGAGATGAGGACACTCAGAAGCCCTCTTTCGTGTCGGCTCCTCCAGATGATG +ATGACTTGTGGTTACCCCCTCCGGAATATGTTCCATTGAAGGAACTCTCGGGTAAGAAAA +ACATGAGAAACTTTTGTATAAATGGAGAGGTCAAAGTGTGTAGTCCGAATGGCTATTCAT +TTAGGATCCTGAGGCACATTTTGAAATCATTCGATGAGATCTATTCTGGAAATCAAAGAA +TGATAGGGTTAGTTAAAGTTGTTGTTGGACTTGCGTTGTCAGGAGCCCCAGTCCCGGAGG +GCATGAACTGGGTATACAAATTGAGGAGAACTCTTATCTTCCAGTGGGCTGATTCTAGGG +GCCCTCTAGAGGGGGAGGAGTTAGAATACTCTCAGGAAATTACCTGGGATGACGATGCCG +AATTTGTCGGATTGGAAATACGAGTGAGTGCAAGACAGTGCCATATCCAGGGCAGGATTT +GGTGTATCAACATGAACTCTAGGGCATGTCAACTATGGTCTGACATGTCCCTTCAAACAC +AAAGGTCTGAGGAGGATAAAGACTCTTCAATGCTTCTGGAATAGTCAATTTACATCCTAC +AAATTCCTCAATTGTTTACCTCTGGAGGAGAGAGCACATGGACTTAACTCCAACCTTCAG +GAGCAATAGAACAAAAACATGTTATGGTGCCGTTGAATTGCTGCATTTTATCATAGTCAA +ATCAATTATCTTTACATTTTAAGCCTCTCGGATGTGAAAAAAACTATCAACATCCCTCAA +AAGACTTAAGGAAACATGATCCCTCAGGCTCTTCTGTTTGTGCCTTTTCTATTTCCCTCG +TTGTGTCTCGGGAAATTCCCCATCTACACCATACCGGAAAAGCTCGGCCCTTGGAGTCCC +ATCGACATACATCATCTCAGCTGTCCTAACAATTTGGTTGTGGAGGACGAGGGTTGCGAC +AGTCTGTCAGGGTTTTCTTACATGGAACTGAAGGTGGGTTACATCTCTGCCATAAAGGTG +AATGGGTTCACTTGTACCGGTGTCGTGACAGAAGCTGAGACCTACACCAACTTTGTTGGT +TATGTCACCACCACGTTCAAGAGAAAACACTTCCGCCCTATGCCAGATGCATGCAGAGCT +GCGTACAACTGGAAGACAGCCGGTGACCCTAGATATGAGGAGTCTCTTCACAATCCTTAT +CCTGATTACCATTGGCTACGGACCGTGAAAACCACCAAAGAATCTCTTGTTATCATATCG +CCGAGTGTGGCTGATTTGGACCCATATGACAAATCCCTTCATTCAAGAGTTTTCCCTGGT +GGGAAATGTTTGGGGATAACAATCTCTTCCACCTACTGCTCAACTAACCATGACTATACT +ATCTGGATGCCCGAAGAAGCAAGACTCGGGACATCTTGTGACATTTTTACCAACAGCAAA +GGGAAGAGGGCATCTAAGGGGGGTAGGACTTGCGGATTCGTGGATGAAAGGGGCTTATAT +AAGTCTCTAAAAGGGGCATGTAAACTTAAGCTGTGCGGAGTTCCTGGACTTAGACTTATG +GATGGAACGTGGGTCGCTATTCAGACACCAGGTGAGACCAAATGGTGCTCTCCTGATCAG +CTGGTAAATCTACATGACTTTCGTTCAGATGAGATAGAACATCTCGTCGTGGAGGAGTTG +ATCAAGAAGAGAGAAGAATGTCTAGATGCACTAGAGTCCATCATGACCACCAAATCAGTA +AGCTTCAGACGTCTCAGCAACTTGAGAAAACTTGTCCCTGGGTTTGGAAAGGCATACACT +ATATTCAACAAAACCTTGATGGAGGCTGATGCTCACTACAAGTCAGTTCGGACTTGGGAT +GAAATCATTCCCTCAAAGGGGTGCCTAAGAGTCGGAGGGAGGTGTCATCCTCATGTAAAC +GGAGTGTTTTTCAATGGTATAATTCTGGGTCCGGATGGACATGTCCTGATTCCAGAGATG +CAATCGTCCCTCCTCCAACAGCATATGGAGCTGTTGGAATCCTCTGTAATCCCCCTAATA +CATCCCTTGGCCGACCCATCAACAGTCTTCAAAGACGGTGATGAAGCGGAGGACTTTGTT +GAGGTTCACCTTCCGGACGTTCACAAGCAGGTCTCAGGGGTCGATCTTGGTCTCCCAAAC +TGGGGGAAATATGTGCTGATGAGTGCAGGTGTTCTAGCGACCGTGATACTGACAATCTTC +TTGTTAACATGTTGCAGAAGGGTTAACAGAACAAAACCAAAACAACAAAGTCTTGGGGAG +TCAGGAAGGAAAGTATCGGTTACTCCTCAAAATGGGAAGGTCATGTCTTCATGGGAGTAT +TACAAGAGTGAGGGCAGGACCAGTCTGTGAGTGCTGGCCATCTCCTCCATATCTTGCGTT +CAGAAGATCACCTCTCTTCTAGATCTGGGGGAATCTCTTGTTTTGACAGTCCTTTGTGGA +CTCCGTGCTACAAGGCAAAATTCGAGAGTCAAGAAACTTTCATTAATCATCCCAACTGAT +CAGACACAGTTACGTAGGTTCTGATAATGTATGACGTCTTCTGACAGTGTCAGTGACCAA +TGGTGCTCTCATCCCCCATGGACTGATACCAAAGGTTGTGGACAAACCAACCGATATCTC +AGATAATTCTAGGCTTGAGCCGGGGCAGGGACCGTGGCTAGTCCCCCTACACTAGACT-A +AATAATGGTTAGCTGAGGGAAGCGATTTGCCTCCTATGAAGGACATAAGCAATAGATCAC +AATCATCTTACATCCCGATAAGGTGTGCTTAACTACAAAGGGCTGGGCCATCCAAGCTTT +TCAGCCAAGAAAAAAACTGTGGAATGGAGGAGTAATTAACAACACTTCTCATCCTGAGAA +CTGCACCATGATGCTTGATCCAGGGGAGGTTTATGATGACCCCGTTGATCCGATTGAGTC +AGAGGCCGAGCCGAGAGGGAACCCAACCATTCCCAACATCTTAAGAAACTCTGACTACAA +TCTCAACTCTCCTCTAATAGAGGATCCAGCCAAACTAATGTTAGAATGGTTGAAGACAGG +AAATAGGCCTCTCCGGATAACTTTAACAGACAATTGCTCTAGGTCTTACAAAATTTTGAA +GGATTATTTCAAGAAAGTGGATATAGGATCTATCAAAGTGGGCGGGGCTGCAGCACAATC +TATGATCTCCCTTTGGTTGCACGGTGCCCACTCTGAATCAAATAGGAGCCGGAAGTGTAT +AACCGACTTGGCTCAGTTCTATTCCAAGTCTTCCCCCATAGAAAAGCTGTTAAATTACAC +ACTCGGAAATCGAGGGCTGAGGATCCCCCCAGAGGGGGTCCTAAGTTGCCTTGAGAGGGT +CGATTACGATAAAGCATTTGGGAGGTATCTGGTTAACATATACTCCTCTTACTTATTCTT +TCACGTGATCACCCTTTACATGAACGCCTTGGACTGGGATGAGGAGAAGACCATTCTAGC +ACTGTGGAGGGATTTAACCTCAATAGATATAGGAAAGGACTTGGTCAAGTTTAAAGATCA +AATATGGGGATTGCTAATTGTGACCAAGGATTTTGTGTACTCACAAAGTTCTAACTGCCT +TTTTGATAGAAACTACACGCTTATGCTTAAAGACCTTTTTTTGTCTCGGTTCAACTCTCT +AATGATTCTCCTTTCTCCTCCGGAACCCAGATATTCAGACGACCTGATATCCCAGCTGTG +TCAGCTATATATCGCTGGAGATCATGTCTTGTCTATGTGCGGGAACTCTGGTTATGAGGT +CATTAAAATATTGGAGCCGTACGTTGTGAACAGTTTAGTCCAGAGGGCAGAAAAGTTTAG +GCCTCTCATTCATTCCCTAGGGGACTTCCCTGTATTTATAAGAGATAAGGTAGGTCAGCT +TGAAGGAACATTTGGTCCCAGTGCAAAAAGGTTCTTCAGGGTTCTGGATCAATTCGACAA +TATACACGACTTAGTCTTTGTATACGGCTGTTATAGGCATTGGGGGCATCCTTACATAGA +TTATAGAAAGGGCTTATCGAAGCTATATGATCAAGTCCACATCAAGAAGGTGATAGATAA +GACTTACCAGGAGTGTTTGGCCAGCGACCTGGCCAAAAGGATCCTCAGGTGGGGATTTGA +CAAGTATTCCAAATGGTATATTGATTCAAGACTCCTCTCAAAGGACCACCCCCTAACTCC +TTATATCAAAACCCAGACGTGGCCTCCAAAACATGTGGTAGATTTGGTGGGTGACACTTG +GCATAAGCTCCCGATCACCCAGATCTTCGAGATCCCCGAATCAATGGACCCATCTGAGAT +ACTAGATGATAAATCACACTCTTTTACTAGAACTAGACTAGCGTCCTGGCTATCAGAGAA +CAGAGGAGGGCCGGTCCCCAGCGAGAAGGTCATTATCACTGCTCTTTCCAAGCCTCCTGT +CAACCCCAGGGAATTTCTGAAATCTATAGACCTGGGAGGATTGCCGGACGAGGATTTGAT +AATCGGCCTCAAGCCTAAGGAAAGAGAGTTGAAGATAGAAGGTCGGTTTTTTGCCTTGAT +GTCCTGGAATCTAAGGCTGTATTTTGTCATCACAGAAAAGCTCCTAGCCAATTATATCTT +GCCACTTTTTGACGCACTGACTATGACAGACAACTTGAACAAAGTGTTTAAAAAGCTGAT +CGACAGAGTCACCGGACAGGGGCTTTTAGACTACTCCAGAGTTACATACGCTTTTCACCT +GGACTATGAAAAGTGGAACAATCATCAGAGGCTGGAGTCGACAGAGGATGTATTTTCTGT +ACTTGATCAAGTATTCGGATTAAAGAGGGTGTTTTCCAGAACTCATGAGTTTTTTCAGAA +GTCTTGGATCTATTACTCAGATAGATCCGACCTCATCGGGTTATGGGAAGATCAAATATA +CTGTTTGGACATGTCAAACGGCCCGACATGCTGGAACGGCCAGGATGGCGGGCTAGAGGG +TTTGCGACAGAAAGGCTGGAGTCTGGTTAGCCTATTAATGATAGATCGTGAATCTCAAAC +CAGGAACACAAGAACTAAAATACTAGCTCAGGGAGACAACCAAGTTCTGTGTCCGACATA +TATGCTGTCGCCGGGGCTCTCTCGAGAGGGGCTTCTCTACGAGTTGGAGAGCATATCAAG +AAACGCTCTCTCGATATATCGTGCCATCGAGGAAGGGGCATCCAAACTGGGGCTCATCAT +AAAGAAGGAAGAGACCATGTGTAGCTATGACTTTCTCATCTATGGGAAAACTCCTTTATT +TCGAGGTAACATCTTGGTGCCTGAGTCCAAAAGATGGGCTAGGGTCTCCTGTATCTCCAA +CGACCAGATAGTCAACCTTGCCAATATAATGTCAACTGTATCCACTAATGCATTGACTGT +CGCCCAACACTCTCAGTCTTTGATTAAACCGATGAGGGACTTCCTGCTTATGTCAGTACA +AGCTGTCTTCCATTACTTGCTGTTTAGCCCCATCTTGAAAGGCAGAGTTTACAAGATCCT +GGGTGCCGAGGGGGACAATTTTCTTCTAGCTATGTCTAGGATAATTTATTTAGACCCCTC +ATTGGGGGGAGTATCTGGAATGTCTCTTGGAAGGTTCCACATACGTCAGTTCTCAGATCC +TGTCTCAGAAGGGTTGTCCTTCTGGAGAGAGATATGGCTGAGCTCCAGTGAGTCTTGGGT +TCATGCACTGTGTCAAGAGGCAGGAAACCCAGATCTGGGAGAGAGAACACTGGAAAGCTT +CACTCGCCTGCTCGAGGATCCTACTACCCTAAACATTAAAGGAGGGGCCAGTCCTACCAT +TCTTCTCAAAGATGCGATCAGAAAAGCCTTGTATGATGAGGTAGACAAGGTGGAGAACTC +AGAGTTTAGGGAAGCGATCCTCTTGTCTAAGACTCATAGGGATAATTTCATACTCTTCTT +GAAATCTGTTGAGCCTTTGTTCCCCCGATTTCTCAGTGAGCTCTTCAGTTCATCATTCCT +GGGGATTCCCGAATCAATCATTGGGTTGATACAAAACTCCAGAACAATAAGAAGACAGTT +TAGGAGGAGTCTCTCAAGAACCTTGGAAGAGTCCTTTTATAACTCAGAGATCCATGGGAT +CAACCGGATGACCCAGACTCCCCAGAGGGTCGGGAGAGTTTGGCCCTGCTCTTCTGAGAG +GGCAGACCTCCTAAGAGAAATCTCATGGGGAAGGAAGGTGGTCGGCACAACAGTTCCTCA +CCCCTCTGAGATGTTGGGGTTGATTCCCAAATCCTCTATTTCCTGTACTTGTGGAGTAAC +AGGAGGGGGAAACCCTCGAATATCAGTGTCTGTGCTTCCATCTTTTGATCAGTCATTTTT +TTCGAGAGGCTCTCTGAAAGGATATCTGGGCTCATCCACTTCCATGTCGACCCAGCTATT +CCATGCCTGGGAGAAAGTCACCAATGTTCATGTGGTGAAAAGAGCTCTGTCACTCAAGGA +ATCTATAAACTGGTTCATTACAAGAGACTCAAATTTGGCCCAAACTTTGATTAGGAACAT +AATGTCTCTGACAGGCCCAGATTTTCCTCTGGAAGAGGCCCCTGTCTTCAAGAGGACAGG +GTCTGCCCTGCATAGGTTCAAGTCGGCTAGATACAGTGAAGGGGGGTACTCTTCAGTTTG +TCCAAATCTCCTCTCTCACATCTCTGTCAGTACAGACACGATGTCTGATTTGACTCATGA +TGGGATAAATTATGACTTTATGTTTCAGCCGTTGATGCTTTATGCGCAAACATGGACATC +AGAACTGGTGCAAAAAGATACACGGCTGAAAGATTCTACCTTTCACTGGCATCTTCGATG +TAACAAGTGCATAAGGCCCATCGATGATATCACCCTTGATACCTCTCAGATCTTCGAGTT +CCCAGATGTCTCAAGAAGGATATCTAGGATGGTTTCTGGAGCTGTGCCTCATTTCAGAAA +GCTTCCTGATATTCGTTTGAGACCAGGTGACTTCGAATCTCTAAGTGGTAAAGAGAAGTC +TCGCCACATAGGATCGGCCCAAGGACTCTTGTATTCAATCTTGGTTGCGATCCATGACTC +TGGGTATAATGATGGAACCATTTTCCCTGTCAACATATATAGCAAGGTCTCCCCTAGAGA +CTATTTGAGAGGGCTTGCGAGAGGAGTCTTGATAGGGTCCTCGATTTGCTTCTTGACAAG +AATGACAAACATTAACATCAATAGACCTCTTGAACTGATCTCAGGAGTGATATCATACAT +CCTCTTGAGACTGGACAATCACCCATCCTTGTATATAATGCTCAGAGAGCCATCCCTTAG +AGGAGAGATATTTTCTATTCCCCAAAAGGTCCCCGCTGCTTACCCGACCACGATGAAAGA +AGGTAACAGATCTGTTCTGTGCTACCTCCAACATGTGTTGCGCTACGAACGAGAGGTGAT +CACTGCATCTCCGGAGAATGACTGGTTGTGGATCTTCTCGGACTTTAGAAGCTCTAAGAT +GACATATCTGACTCTCATCACCTACCAGTCTCATCTGTTACTTCAGAAGGTTGAGAAGAA +TCTCTCCAAGAGTATGAGAGCCAACCTACGGCAAATGAGCTCTCTGATGAGGCAGGTGCT +GGGTGGCCATGGTGAAGACACCTTAGAGTCAGACGAAGACATCCAGAGGCTGTTAAGAGA +CTCTCTGCGTAGGACAAGGTGGGTAGACCAGGAAGTGCGCCATGCAGCGAGAACTATGAC +AGGAGTTTACAGCCCCACCAAGAAGATGTCACGCAAAGCCGGGTGTTCAGAATGGGTCTG +CTCTGCACAGCAGGTTGCGGTTTCGACCTCATCGAACCCAGCCCCTGTTTCAGAGCTGGA +CGTCAGAGCCCTCTCCAAAAGGCTTCAAAACCCGTTGATCTCTGGACTGAGAGTGGTTCA +GTGGGCGACAGGGGCCCATTATAAGCTCAAACCTATTCTGGATGATCTCAATGTCTTTCC +ATCTCTATGTCTTGTGGTCGGGGATGGGTCAGGGGGGATATCAAGAGCAGTACTTAACAT +GTTTCCTGATGCCAAACTCGTATTCAACAGCCTGTTAGAAGTAAATGACCTGATGGCATC +AGGAACACATCCGTTGCCCCCCTCAGCAATCATGAGTGGAGGAGATGACATCATATCCAG +GGTAATTGGCTTCGACTCCATCTGGGAGAAACCTTCTGACTTGAGGAACTTGACCACATG +GAGGTACTTCCAGTCAGTCCAAGAACAAGTAAATATGTCATACGACCTTATCATTTGTGA +TGCAGAGGTCACTGACATTGCATCAATCAACCGAATAACCCTGCTAATGTCTGATTTTGC +ATTGTCCATAGACGGCCCACTTTATCTGGTTTTCAAAACTTACGGTACCATGCTTGTAAA +CCCTGACTACAGAGCAATTCAACATCTATCCAGAGCATTTCCTGCGGTCACTGGATTCAT +AACTCAGATGACCTCGTCCTTCTCATCCGAGCTATATCTCAGATTCTCAAAGCGGGGGAA +GTTCTTCCGGGATGCAGAGTATTTGACTTCTTCCACCCTTCGAGAAATGAGCCTTGTATT +GTTCAACTGTAGCAGCCCCAAGAGTGAGATGCAGAGAGCCCGCTCTCTAAATTACCAAGA +TCTTGTAAGAGGATTCCCAGATGAGATCATATCCAATCCTTACAGCGAAATGATCATAAC +TCTGATTGACAGTGATGTAGAATCTTTCCTGGTTCACAAAATGGTAGATGATCTAGAGTT +GCAGCGAGGAACTTTGTCTAAAGTTTCTATCATTATAGCCATCATGATAGTCTTCTCCAA +TAGGGTGTTCAATGTCTCAAAACCACTGACTGACCCTTTATTCTATCCGCCATCTGATCC +CAAGATCTTGAGACACTTCAACATATGCTGCGGCACCATGATGTACTTGTCTACCGCCTT +AGGGGATGTGCCCAGCTTCTCGAGACTTCATGACCTGTACAACAGACCCATAACTTATTA +TTTTAGGAAGCAAGTCATCCGGGGGAGTGTTTACCTGTCCTGGAGTTGGTCTGATGACAC +TTCAGTGTTTAAAAGGGTGGCTTGCAACTCTAGCTTGAGTCTCTCATCTCACTGGATCAG +GTTGATTTACAAAATAGTGAAGACCACCAGACTTGTAGGAAGGGTCGAGGACCTGTCCAG +GGAGGTAGAGAGGCACCTTCGGGGGTACAACAGATGGATCACCCTCGATGACATTAGATC +CCGATCATCTCTGTTAGATTATAGCTGCTTGTAAGACTAAGCACTCTCGAGGGAATGTAC +AGACTAAGATTATGGGACGGTGTAACCTGAAAAAAACAAGATCCCGATTCATAACTTCTG +TTTACTTGATTG-TTTTTCCATCTTTATTGTTTTTTTGTTAAGCGT +>RBV16-1 +----------ACAAAATCAGAGAAGAAGTAGACAGTATCATCTACAAAAAGAAAATGTAA +CACCTCTACAATGGATACCGACAAAATTGTATTCAAAGTCAATAACCAGGTTGTCTCTCT +AAAACCTGAAATTATTGTAGATCAATATGAGTACAAATACCCGGCTATCAAAGACTTAAA +AAAGCCCAGTATCTCCCTGGGAAAAGCTCCTGATTTGAACAAGGCGTATAAGTCAATTTT +GTCCGGTATGAATGCAGCTAAGCTCGACCCTGACGATGTGTGCTCTTACTTGGCAGCTGC +AATGCAGTTCTTTGAAGGAACATGTCCAGAAGACTGGACTAGCTATGGAATCTTGATTGC +AAGGAAAGGAGACAAGATAACTCCAAACTCTCTCGTAGACATAAAACGTACAGATGTAGA +AGGGAACTGGGCTCTAACAGGAGGAATGGAGTTGACTAGGGATCCCACCATTCCAGAACA +TGCATCTTTGGTTGGTCTTCTCCTGAGTTTGTATCGATTGAGCAAAATATCCGGACAGAA +CACAGGCAATTATAAGACAAATATCTCTGATCGTATAGAGCAGATTTTTGAGACGGCCCC +CTTTGTGAAGATCGTGGAACATCACACTTTGATGACAACTCACAAAATGTGCGCTAACTG +GAGCACCATACCGAACTTTAGATTCCTAGCCGGAACTTATGACATGTTTTTCTCTCGGAT +TGAACATCTATATTCAGCAATCAGAGTGGGTACAGTTGTCACTGCTTACGAGGACTGCTC +AGGGCTAGTATCCTTTACAGGTTTTATAAAGCAGATAAACCTTACAGCAAAGGAAGCAAT +ACTTTATTTCTTCCACAAAAATTTTGAGGGAGAGATAAGAAGAATGTTTGAGCCGGGACA +GGAAACTGCAGTCCCTCACTCCTATTTCATCCATTTCCGGTCTTTGGGCCTTAGTGGGAA +ATCTCCATATTCGTCAAGTGCAGTTGGTCACGTGTTCAACCTCATTCACTTTGTTGGATG +CTATATGGGTCAAGTGAGGTCTTTGAATGCAACGGTTATTGCCACATGTGCTCCACATGA +GATGTCTGTTCTCGGGGGTTATTTGGGGGAGGAGTTCTTTGGCAAGGGGACTTTTGAGAG +AAGATTCTTCAGAAACGAGAAGGAACTTCAGGACTATGAAGCAGCAGAGTTGACAAAGAC +TGAGGTCGCTCTGGCAGACGACGGAACAGTCAATTCTGACGATGAAGACTACTTCTCTGG +TGAAACCAGGAGTCCAGAGGCGGTCTATACTCGGATCATGATCAATGGGGGCCGACTCAA +AAGATCACATATAAGAAGGTATGTATCAGTCAGTTCCAATCATCAAGCTCGCCCCAATTC +ATTTGCTGAATTTCTAAACAAGACATATTCTAACGACCCGTAGGGAGTCGAACTTCAAGA +TTGTCAACAATAATAAATTGTTTAATTCCTCCACGAAAAAAACTAACACCCCTCCTTTTG +AACCATCCCAAGCATGAGCAAGATTTTTGTCAACCCAAGTGCGATCCGGGCCGGCCTTGC +TGACCTAGAGATGGCAGAGGAGACTGTAGATCTGATTGCCAGGAACATCGAGGACAATCA +GGCTCATCTCCAAGGAGAGCCTATAGAAGTAGATAGTCTGCCCGATGACATGAGACGGCT +CCACTTGGATAGTGAAAAACCGTCTGGTTTTGACAAGGTGACAAAAGAAGGGGAGAGCAA +GTGTCATGAAGACTTTCAGATGGATGAAGGGGAGGACCCCAGCCTCTTGTTCCAGTCATA +CCTGGATAATGTCGGAGTTCAAATAGTCAGGCAAATGAGGTCAGGAGAGAGATTCCTTAA +AATATGGTCTCAGACTGTGGAGGAGATCATATCCTATGTCATGATCAATTTCCCAGGCTC +TTTGGGGAGGCCTTCTGAAGACAAGGCCACTCAAACTGCCAATCGGGAACCCAAGAAAGG +AGTGGCATCAGTTTCGTCTCAACTCGAAGGTCAATCATCCAAAGCGAGAGCGGCAGCCCA +AACTGCCTCCGGTCCACCTGCCCTAGAATGGTCTGCCACCAACGAAGAAGACGATTTGTC +TGTGGAGGCAGAGATAGCTCATCAAATTGCCGAGAGCTTCTCCAAGAAGTACAAATTCCC +CTCTCGATCATCAGGGATATTCTTGTATAATTTTGAGCAGTTAAAGATGAACCTTGATGA +CATAGTCAAGGAATCAAAGAATGTGCCTGGTGTTACCCGCTTGGCCCATGAAGGGTCTAA +ACTCCCTCTAAGATGTGTACTTGGGTGGGTTGCTCAAGCCAACTCCAGAAAGTTCCAGCT +GCTGGTCGAGCCTGACAAGCTAAATAAAATAATGCAAGACGACCTAAACCGTTATTTGTC +TCGTCAACTAAATTTGTAGCCTCAGTCCCATTATGCAATCAAAACCAGTCTGATCCAAAG +TCAACGTGAAAAAAACAGGCAACACCACTAATACAATGAATTTCCTACGCAAGATAGTGA +AGAACTGTAGAGATGAGGACGATCAGAAGCCTTCTCTCGCATCGGCCCCCCCAGATGATG +ATGATCTGTGGTTGCCCCCTCCAGAATATGTCCCATTGAAGGAGCTCACAGGGAAGAAGA +ACATGAGAAACTTCTGTGTTAATGGGGAGGTCAAGGTGTGTAGTCCAAACGGATATTCCT +TTAGGATCTTGCGTCACATTCTGAAATCGTTCGATGAGATCTATTCCGGAAATCAAAGAA +TGATTGGGTTAGTCAAAGTTGTGGTCGGATTGGCCTTGTCTGGAGCCCCAGTCCCCGAGG +GCATGAACTGGGTTTACAAACTAAGGAGAACTCTTATTTTCCAATGGGCAGACTCTAGGG +GTCCTCTGGAAGGGGAAGAGTTGGAGTACTCTCAGGAAATTACCTGGGACGATGACACTG +AATTTGTAGGGTTGCAAATAAGGGTGAGTGCTAGACAATGCCATATCCAAGGTAGAATCT +GGTGCATTAACATGAACTCTAGAGCATGTCAATTGTGGTCTGATATGTCCCTTCAAACTC +ATAGATCTGAGGAGGACAAAGACTCCTCAGTCCTTCTAGAGTAGTCGAATTATATCTCAC +AAGTTCCTCAATTGTCCACCTCTGGAGGAGAGAACACATGGGCTCAACTCCAACCTTCAG +GAGCAATAGAACAAAAACATGTTATGGTGCCGTTGAATCGCTGCATTTTATCAGAGTCAA +ATCAATTACAATTGCACTTTAAGCCTCTTGGATGTGAAAAAAACTATTAACATCCCTCAA +AAGACCTGAGGAAAGATGGTCCCTTGGGCCCTTCTGTTTGTGCCTTTTCTGATCTCTTCA +TTGTGTTTTGGGAAGTTCCCTATCTACACAATACCAGACAAGCTAGGTCCTTGGAGTCCT +ATCGACATACATCATCTCAGTTGTCCCAATAATCTTGTTGTGGAAGACGAGGGGTGTAAT +AGTCTGTCGGGGTTTTCTTACATGGAACTAAAAGTGGGATACATCTCTGCCATAAAAGTG +AACGGGTTCACTTGTACTGGTGTCGTGACGGAAGCCGAGACCTACACTAACTTCGTCGGT +TATGTCACCACCACATTCAAGAGAAAACACTTTCGCCCGATGCCCGATGCATGTAGATCC +GCATACAATTGGAAAATGGCAGGCGATCCTAGGTATGAAGAGTCCCTTCACAACCCATAC +CCTGATTATCATTGGCTGCGGACAGTTAAAACCACCAAGGAGTCTCTTATCATCATCTCG +CCAAGTGTGGCTGACCTAGACCCGTATGACAAATCCCTTCATTCAAGGATTTTTCCTGGG +GGGAAATGCACGGGTCTAACAGTCTCTTCCACCTACTGCTCGACCAACCATGACTACACC +ATCTGGATGCCTGAAAAAGCAGGGCTCGGGACATCTTGTGACATCTTCACCAATAGTAAA +GGGAAGAGAGCATCTAAAGGAGGCAAGACTTGTGGATTTGTGGACGAGAGAGGTTTGTAT +AAGTCCTTGAAAGGAGCTTGTAAGCTCAAACTGTGCGGAGTTTCTGGGCTTAGACTTATG +GATGGAACTTGGGTTGCGATTCAGACATTGGATGAAACCAAATGGTGCTCTCCTGATCAA +CTGGTGAATCTGCATGACTTCCACTCGGATGAGCTTGAGCATCTTGTTGTAGAGGAGTTG +GTTAGAAAGAGGGAGGAATGTCTGGATGCATTAGAATCCATCATGACCACCAAATCAGTA +AGCTTCAGACGTCTAAGCCACCTGAGAAAACTAGTTCCTGGGTTTGGGAAGGCATACACC +ATATTCAACAAAACCCTAATGGAGGCCGATGCCCACTACAAGTCCGTTCGGACTTGGAGC +GAGATCATCCCCTCAAAAGGGTGTTTGAGAGTAGGGGGGAGATGTCATCCTCATGTAAAT +GGAGTATTTTTCAATGGCATCATTCTAGGTCCGGACGGGCATGTGTTGATCCCAGAAATG +CAGTCATCCCTTCTCCAACAACATGTGGAACTGTTGGAGTCCTCTGTGATCCCCCTCATG +CACCCCCTGGCAGATCCTTCAGCAGTTTTCAAAGATGGTGACGAGGCAGAGGATTTTGTT +GAGGTTCATCTCCCAGATGTTCACAAACAGATCTCAGGGGTGGACCTAGGCCTCCCAAGC +TGGGGAAAGTATATGCTGATGATTGCAGGTGCTCTAACGACTCTAATGCTGTTCATCTTC +TTGATGACATGTTGCAAAAGAGTCAACAGGACAAAGTCAATACGACAAGGCCCCAGAGAG +ATGGAAAGAAAAGTATCATTTACTCCCCAAAACAAAAAAGTCGTATCTTCATGGGAGTCT +TACAAGAGCGGAGGCGAGACCAAGCTGTAAG-GAGGGCTGCCCCCCCCACACACTATGCT +CGGAAAACTGTTCCCCTCTGATACATGGAAGAATGTCCTGCCTTGACAATCCCCCACGGA +CTCTGTCCAACAGGGTAAATTTCGA-AGTCAAGAAACTTTCATCGATCATCTCACTCGAC +CAGACACATTCAGGTGGACTTTGATGATGTATGAAGCATTTTTACAGTATCAGCGACTAA +TGGTGCTCTCACCCTCTAAGGACTGGTACTAAAGGTAGCGGACAGGCTGACTGACATCTC +AGACAACCCTGTTTCTTAGCTTGGGCAGAGGTTGTGATAAGCTCCTCTACCTTAAACTAA +AACAGTGATCAGCTGAGAAAACTGATTTGCCTCCTATGAAGGACACAAGCAATAGATCAC +AATCATCTCACATCCCAATAAGTTGTGCATAACTACAAAGGGCTGGGCCATCTAATCTCC +TCAATCAAGAAAAAAACTGTGAGACAGAGAAATCCTCAACAACACTTCTCATCCTGAACA +CTGCACCATGATGATTGATCCAGGGGAGGTTTATGATGACCCCATTGATCCAGTCGAGTC +TGAGAGTGAGCCAAGAGGAAACTCCAATATCCCCAACATCTTGCGAAATTCTGACTACAA +TCTCAACTCCCCTCTGATCGAGGATTCAGCCAAGCTGATGTTAGAATGGCTGAAAACAGG +AAATAGACCTCTCCGGATGACCCTGACCGACAATTGTTCTCGGTCTTACAAAGTTCTGAA +AGATTACCTCAAGAAAGTGGATCTAGGATCTCTCAAAGTGGGCGGAGCTGCAGCACAATC +TATGATCTCTCTTTGGTTATATGGTGCTCACTCTGAATCAAATAGGAGCAGGAGGTGTAT +GACTGACTTAGCTCAATTCTACTCCAAATCTTCCCCTATAGAGAAGCTGTTAAACTTCAC +ACTCGGGAACAGAGGGCTGAGAATCCCCCCTGAGGGAGTCTTAAGCTGTCTTGAGAGGGT +GGATTACGATAAGGCATTTGGGAGGTATTTGGCCAATATATATTCCTCCTATTTGTTCTT +TCATGTGATTACCCTTTACATGAATGCCCTAGATTGGGATGAGGAGAAGACTATCTTGGC +ACTGTGGAGGGAATTGACATCAGTGGATATGGGAAAGGACTTGGTCAAGTTTAAAGATCA +GATATGGGGACTTCTGATTGTGACTAAAGACTTCATATATTCACAAAGCTCTCACTGTCT +CTTTGACAGGAACTATACGCTTATGCTAAAAGACCTTTTTTTGTCCCGGTTCAACTCTCT +GATGATTTTGCTGTCCCCCCCGGAACCCAGATACTCAGATGACTTGATATCTCAGCTGTG +TCAGCTGTACATAGCAGGAGATCAAGTGTTGTCCATGTGCGGAAACTCTGGTTATGAAGT +CATAAAAATCTTGGAGCCGTATGTTGTAAACAGCTTGGTCTATAGAGCGGAAAAATTCAG +GCCTCTTATTCACTCTCTGGGGGACTTCCCTGTTTTTATAAAGGACAAGGTGAATCAACT +TGAGGGGACGTTTGGTCCTAGTGCGAAGAGATTCTTTAAAGTCTTGGATCAATTCGACAA +CATACATGATTTGGTCTTTGTATACGGTTGTTACAGGCATTGGGGGCACCCTTACATAGA +CTATAGAAAGGGTCTGTCAAAATTGTATGACCAGGTCCACATTAAAAAGGTGATAGATAA +GTCCTATCAGGAGTGTTTAGCGAGTGACCTGGCCAAAAGAATCCTCAGGTGGGGGTTCGA +CAAATACTCCAAATGGTATCTAGATCCACGACTTCTTGCGAGGGATCACCCCCTGACTCC +TTATATCAAGACCCAGACATGGCCTCCCAAGCACATAGTGGATTTGGTAGGTGATACCTG +GCATAGACTCCCGATCACCCAGATCTTCGAGATCCCTGAATCAATGGACCCATCAGAGAT +ATTAGATGACAAATCACACTCTTTCACTAGAACAAGGCTAGCGTCATGGCTTTCGGAAAA +CAGAGGAGGACCGGTTCCAAGTGAGAAGGTCATTATTACGGCTCTCTCCAAACCGCCTGT +CAACCCTAGAGAGTTTTTGAAATCTATAGACCTTGGGGGGCTGCCAGACGAAGACTTAAT +CATCGGCTTGAAACCCAAAGAAAGGGAGCTGAAGATCGAAGGCCGATTTTTTGCTTTAAT +GTCTTGGAATTTAAGGCTATACTTCGTTATTACAGAGAAGCTCCTGGCCAATTATATATT +ACCACTCTTTGATGCATTAACCATGACCGACAATTTGAACAAGGTGTTCAAAAAGTTGAT +TGATCGAGTCACCGGACAAGGGCTCTTAGATTATTCGAGGGTTACATATGCTTTTCACTT +GGACTATGAGAAGTGGAACAATCATCAGAGACTAGAATCGACTGAGGATGTATTCTCCGT +CCTTGATCAAGTGTTTGGATTAAAAAGGGTGTTCTCCAGAACTCATGAGTTTTTCCAGAA +GTCTTGGATATATTACTCAGATAGATCTGATCTTATAGGATTGTGGGAGGACCAGATATA +CTGTTTGGACATGTCAAATGGCCCGACGTGCTGGAACGGCCAAGATGGCGGGTTAGAGGG +GTTACGACAGAAGGGCTGGAGTCTGGTCAGCCTACTGATGATAGATCGAGAGTCTCAGAC +CAGGAACACAAGAACTAAGATACTAGCCCAGGGAGACAACCAAGTTCTATGTCCGACATA +CATGTTATCGCCTGGGCTTTCTAGAGAGGGCCTCCTCTACGAGTTGGAGAGTATATCAAG +AAACGCACTCTCAATATATCGAGCCATCGAGGAAGGGGCATCTAAACTGGGGCTCATTAT +AAAGAAGGAGGAGACAATGTGCAGCTATGACTTTCTCATTTATGGAAAAACTCCCTTATT +TCGAGGTAACATTCTGGTGCCTGAATCCAAAAGGTGGGCTAGAGTCTCCTGTATCTCTAA +CGACCAAATAGTCAACCTCGCTAATATAATGTCAACAGTGTCTACCAATGCTTTGACTGT +TGCTCAACACTCTCAGTCTTTGATTAAACCGATGAGGGACTTTCTGCTTATGTCAGTGCA +AGCCGTTTTTCATTACTTGCTATTTAGCCCCATTTTAAAAGGCAGAGTTTACAAAATACT +GAGTGCTGATGGGGATCATTTTCTTTTAGCTATGTCTAGAATAGTTTACCTTGACCCCTC +ATTGGGGGGTGTGTCTGGAATGTCTCTTGGGAGGTTTCACATACGCCAGTTTTCAGACCC +GGTCTCTGAGGGGTTATCTTTTTGGAGAGAGATTTGGTTAAGTTCAAATGAGTCTTGGAT +CCATGCACTTTGTCAAGAAGCAGGAAATCCGGATTTGGGAGAGAGAACACTGGAAAGCTT +CACACGCCTTCTCGAGGATCCCACTACCCTGAATATAAAAGGAGGGGCCAGTCCAACCAT +TCTTCTCAAGGATGCAATCAGAAAGGCCCTGTATGATGAAGTGGATAAGGTGGAGAACTC +CGAGTTTAGAGAAGCAATCCTCTTGTCCAAGACCCATAGGGACAACTTTATACTTTTCTT +GAGATCTGTTGAGCCTCTCTTTCCTAGATTTCTCAGTGAACTCTTCAGCTCTTCCTTCCT +GGGGATTCCTGAATCGATTATTGGGCTGATCCAAAATTCTAGAACGATAAGAAGACAGTT +TAGGAAGAACCTCTCTAGAACCTTAGAAGAGTCTTTCTATAACTCAGAGATTCATGGGAT +CAATCGGATGACACAGACTCCCCAGAGAATAGGAAGAGTTTGGGCCTGCTCTTCTGAGAG +GGCAGATCTCCTAAGAGAAATCTCGTGGGGGAGAAAGGTGGTTGGTACAACCGTCCCTCA +CCCCTCTGAGATGTTGGGGTTGCTTCCTAAATCTTCAATCTCCTGTACTTGTGGCGCAAC +AGGAGGGGGAAATCCTCGAATATCAGTGTCTGTACTCCCGTCCTTTGATCAGTCGTTCTT +TTCCAGAGGCCCTCTGAAGGGATACCTGGGCTCATCCACATCTATGTCAACCCAGCTGTT +CCATGCTTGGGAGAAGGTCACCAATGTTCATGTGGTGAAAAGGGCCCTTTCACTCAAAGA +GTCCATAAATTGGTTCGTCACAAGAAACTCCAATTTGGCTCAAACTTTAATCAGGAATAT +AATGTCTCTGACGGGACCAGACTTCCCGCTGGAAGAGGCCCCCGTCTTTAAGAGAACAGG +ATCAGCTCTGCACAGGTTCAAGTCAGCTAGGTACAGTGAAGGGGGTTATTCCTCTGTCTG +TCCCAATCTCCTCTCTCATATTTCCGTCAGCACAGACACGATGTCTGACCTGACTCAAGA +TGGGAAGAACTATGACTTTATGTTCCAGCCGCTGATGCTTTACGCACAGACATGGACATC +AGAACTGGTGCAGAAGGACATACGGCTGAGAGACTCCACCTTTCACTGGCATCTTCGATG +CAATAAGTGTATAAGGTCCATCGATGACATCACTCTAGAGACTTCCCAGATCTTTGAATT +CCCGGATGTTTCGAAAAGGATATCTAGGATGGTCTCTGGAGCAGTGCCTCACTTTCAAAA +ACTTCCTGATATTCGTCTAAGACCTGGAGACTTTGAGTCTCTAAGTGACAAAGAGAAGTC +ACGCCACATAGGGTCGGCTCAGGGGCTCTTGTATTCGATCCTGGTTGCGATCCACGACTC +TGGGTACAACGATGGAACCATTTTTCCTGTCAACATATATAGCAAGGTCTCCCCGAGAGA +CTATTTAAGAGGTCTCGCCAGAGGAATCTTAATAGGGTCTTCTATCTGCTTTCTAACAAG +AATGACAAACATCAACATCAACAGACCCCTTGAACTAATCTCAGGAGTGATCTCATACAT +CCTCCTTAGATTAGACAATCACCCATCCTTATACATCATGCTCAGAGAACCGTCTCTGAG +AGGAGAAATATTTTCTATTCCCCAAAAGATCCCCGCCGCTTACCCAACCACAATGAAAGA +GGGCAACAGATCTATCCTGTGTTACCTCCAGCATGTGCTCCGCTATGAGCGGGAGGTAAT +TACAGCATCCCCGGAGAATGATTGGTTATGGATCTTCTCTGACTTCAGAAGTTCCAAAAT +GACCTATTTAACCCTTATCACCTATCAGTCTCATCTTTTACTTCAGAGGGTTGAGAAGAA +TCTCTCTAAAAATATGAGGGCCAACCTACGACAGATGAGCTCTCTCATGAGACAAGTATT +GGGCGGGCATGGCGAAGACACCTTAGAATCAGACGAGGACATTCAGAGGTTGTTGAAAGA +CTCATTACGTAGGACAAGATGGGTAGATCAAGAGGTGCGTCATGCAGCCAGAGCCATGAC +AGGGGGTTATAGCCCCAATAAGAAGATGTCTCGCAAAGCGGGGTGTTCAGAATGGGTCTG +CTCTGCCCAACAGGTTGCTGTTTCAACCTCAGCAAATCCAGCCCCTGTCTCTGAGCTGGA +CATCAGGGCTCTCTCCAAAAGACTTCAAAACCCGTTGATTTCAGGTCTGAGAGTTGTTCA +GTGGGCAACAGGCGCTCATTACAAACTCAAACCTATTCTTGATGATCTCAATGTGTTCCC +ATCCTTATGTCTTGTGGTCGGAGACGGGTCAGGGGGGATCTCAAGGGCTGTACTCAATAT +GTTTCCGGACGCTAGGCTCGTGTTCAACAGCCTGTTGGAAGTGAATGACCTGATGGCTTC +AGGGACACATCCGTTGCCTCCTTCAGCAATCATGAGCGGAGGGGATGATATCATATCTAG +GGTGATTGACTTTGACTCCATCTGGGAAAAGCCTTCTGACTTAAGGAACTTGACGACATG +GAGGTACTTCCAGTCGGTTCAAGAACAAGTGAATATGTCCTATGATCTTATTATTTGTGA +CGCAGAGGTCACTGACATCGCATCAATTAATCGAATAACTCTACTAATGTCTGATTTTGC +ATTATCAATAGACGGCCCACTTTATTTAGTTTTCAAAACTTATGGAACCATGCTCGTGAA +TCCTGATTACAGAGCCGTTCAACATCTGTCCAGAGCATTTCCCACAGTCACAGGATTCAT +AACCCAGATGACATCATCCTTCTCGTCTGAGCTATACCTCAGATTCTCTAAAAGGGGAAA +ATTTTTCCGAGATGCAGAATACTTGACTTCTTCTACTATTAGGGAGATGAGCCTTGTATT +GTTCAACTGCAGCAGCCCAAAAAGTGAGATGCAGAGGGCCCGCTCTCTGAATTACCAGGA +CCTTGTAAGAGGATTTCCTGAGGAGATCATATCCAACCCATATAATGAGATGATCATAAC +TCTGATTGACAGTGATGTGGAATCTTTCCTGGTTCATAAGATGGTTGATGACCTAGAGTT +GCAGCGAGGAACTTTGTCTAAAGTCTCCATTATCGTAGCCATCATGATAGTCTTTTCCAA +CAGGGTGTTTAATGTCTCGAAACCGTTGACTGACCCTTTATTCAATCCGCCATCTGACCC +CAAAATCTTGAGACACTTCAATATTTGCTGCAGTACCATGATGTACTTGTCTACTGCTCT +GGGGGATGTTCCAAGCTTTGCTAGACTTCATGACCTGTATAACAGACCAATAACCTACTA +TTTCGGGAAGAAAGTTATCCGAGGGAACATTTATTTATCCTGGAGTTGGTCTGACGACAC +TTCAGTGTTCAAAAGGGTGGCTTGCAATTCTAGTTTGAGCCTCTCGGCTCACTGGATAAG +GCTGATTTACAAAATAGTGAAGACTACCAGACTTGTGGGAAATACGGAGAATCTATCCAG +GGAGGTCGAAAAGCACCTTCGAGGGTACAACAGGTGGATTACCCTAGACGACATAAAATC +CAGATCATCTCTTCTAGATTACAGCTGCCTATAAAACAGGGCATTTGGGGAGAAATACAT +GGACCAACACCTTGAGACAGTATGCCCTGAAAAAAACAAGACCCTGATTCATAACCTCTA +TTTGCTTGATGGTTTTTTTTGTCTTTGTTGTTTTTTTGTTAA---- diff --git a/test/input/TestSnpEff/ref-rabies-JQ685920.fasta b/test/input/TestSnpEff/ref-rabies-JQ685920.fasta new file mode 100644 index 000000000..872a61572 --- /dev/null +++ b/test/input/TestSnpEff/ref-rabies-JQ685920.fasta @@ -0,0 +1,200 @@ +>JQ685920 +ACGCTTAACAACAAAATCAGAGAAGAAGTAGACAGTGTCGTCTACAAAGCAAGAATGTAA +CACCCCTACAATGGATGCCGACAAGATTGTGTTTAAAGTCAATAATCAGGTGGTCTCTCT +GAAGCCTGAAATTATAATAGATCAATATGAATACAAGTACCCGGCTATCAAAGATTTGAA +AAAGCCCAGTATAACCTTAGGAAAAGCCCCTGACTTGAACAAAGCATACAAGTCAATTCT +GTCCGGCATGAATGCAGCCAAGCTTGACCCTGATGATGTATGCTCTTATCTAGCAGCCGC +AATGCAGTTCTTTGAGGGGACATGTCCTGATGACTGGACCAGCTATGGAATCCTGATTGC +ACGGAAGGGAGACAAGATTACTCCAAATTCTCTTGTGGACATAAAACGTACTAATGTGGA +AGGGAACTGGGCTTTGACAGGGGGTATGGAGTTGACGAGAGACCCCACAGTTTCGGAGCA +TGCATCCCTAGTTGGTCTTCTCTTGAGTCTTTACAGGTTAAGCAAAATATCTGGACAAAA +CACCGGCAATTACAAAACGAACATCGCAGACAGAATAGAGCAGATTTTCGAGACGGCCCC +CTTTGTAAAGATCGTAGAGCATCATACCTTGATGACAACCCACAAAATGTGTGCTAACTG +GAGTACTATACCGAACTTCAGATTTCTGGCCGGAACCTACGATATGTTTTTCTCGCGGGT +TGAACATCTGTATTCAGCAATTAGAGTGGGTACAGTTGTAACAGCCTATGAGGACTGCTC +AGGGTTGGTGTCGTTTACAGGGTTTATTAAGCAAATAAATCTCACTGCAAAAGAAGCAAT +ACTATATTTCTTCCACAAGAACTTCGAGGAAGAGATAAGAAGAATGTTCGAGCCGGGGCA +AGAGACGGCAGTTCCTCACTCCTATTTCATACATTTTCGTTCTTTGGGCCTGAGTGGGAA +ATCTCCGTATTCATCAAATGCAGTTGGTCACGTGTTCAACCTCATTCACTTTGTTGGATG +TTATATGGGTCAAGTGAGATCTTTGAACGCAACGGTTATTGCTACATGTGCCCCGCATGA +GATGTCTGTTCTTGGGGGTTATTTGGGGGAGGAGTTTTTCGGAAAAGGGACTTTTGAGAG +AAGATTCTTTAGGGATGAGAAAGAACTTCAGGAATATGAGGCAGCTGAAGCAACGAAGAC +TGAAATAGCCTTGGCGGATGACGGAACAGTCAATTCTGATGATGAGGACTACTTCTCTGG +TGAAACTAGGAGCCCGGAGGCAGTTTATACTCGAATCATGATGAATGGGGGTAGATTAAA +AAGATCACACATAAGGAGATATGTCTCAGTCAGTTCCAATCATCAAGCTCGCCCTAATTC +GTTCGCTGAGTTTCTAAGTAAGACATACTCTAGTGATTCATAAAGAATTGAACAACAGGA +TTGTAAACATTAACAAATTGTGTACATCCTTCACGAAAAAAACTAACACCCCTCCTCTTG +AACCATCTCAGACATGAGCAAGATTTTTGTTAACCCAAGTGCAATCAGGGCAGGCTTGGC +TGATCTGGAGATGGCAGAGGAAACTGTAGATCTAGTTGCCAAGAACATCGAAGATAATCA +AGCTCATCTCCAAGGAGAACCCATAGAGGTGGACAGTCTTCCTGAAGACATGAGACGGCT +TCAGTTAGACGATGAAAAACCATCTGGCCTCGGTGTGATTGCCAAAGCAGGGGAGAGCAA +ATGTCAGGAAGACTTTCAGATGGACGAGGGGGAGGACCCCGCCCTCTTGTTCCAGTCGTA +CCTAGACAATATTGGAGTTCAAATGGTCAGACAAATGAGATCAGGAGAGAGGTTCCTCAA +GATATGGTCTCAGACTGTTGAGGAGATCATATCCTATGTCACGGTCAATTTCCCCAGTCT +GCCAAGGAAAGCCTCAGAAGACAAGGCTACCCAGACTGCCAACCAGGAACTCAAGAAAAA +GACAATGTCTGTTTCTTCTCAGCGAGAAAGTAAATCATCTAAAGCTAAAATGGCGGCCCA +AACCGCCTCCGGTCCTCCTGCTCTAGAATGGTCCGCCACAAACGAGGAGGATGACCTATC +TGTGGAGGCTGAAATTGCTCACCAGATTGCTGAGAGCTTCTCTAAGAAGTACAAATTCCC +CTCTCGATCATCAGGGATATTCTTGTATAATTTTGAGCAGTTGAAGATGAACCTTGATGA +CATCGTTAAGGAGTCGAAAAATGTGCCAAGCGTAACCCGCTTAGCCCATGACGGATCCAA +ACTCCCTCTGAGGTGTGTGCTGGGGTGGGTTGCTCTAGCCAACTCCAAAAAGTTCCAGCT +GTTAGTTGAGCCTGACAAGCTAAACAAAATAATGCAAGACGACCTGAATCGTTATGTGTT +CTGCTGACCAAACCCTCAAACTCAGTCGTGCTATGCGATCAAATCCAGCCTGCTCCAAAC +CCAACGTGAAAAAAACAGGCAACACCACTGATAAAATGAACTTTCTACGCAAGATAGTAA +AGAACTGTAGAGATGAGGACACTCAGAAGCCCTCTTTCGTGTCGGCTCCTCCAGATGATG +ATGACTTGTGGTTACCCCCTCCGGAATATGTTCCATTGAAGGAACTCTCGGGTAAGAAAA +ACATGAGAAACTTTTGTATAAATGGAGAGGTCAAAGTGTGTAGTCCGAATGGCTATTCAT +TTAGGATCCTGAGGCACATTTTGAAATCATTCGATGAGATCTATTCTGGAAATCAAAGAA +TGATAGGGTTAGTTAAAGTTGTTGTTGGACTTGCGTTGTCAGGAGCCCCAGTCCCGGAGG +GCATGAACTGGGTATACAAATTGAGGAGAACTCTTATCTTCCAGTGGGCTGATTCTAGGG +GCCCTCTAGAGGGGGAGGAGTTAGAATACTCTCAGGAAATTACCTGGGATGACGATGCCG +AATTTGTCGGATTGGAAATACGAGTGAGTGCAAGACAGTGCCATATCCAGGGCAGGATTT +GGTGTATCAACATGAACTCTAGGGCATGTCAACTATGGTCTGACATGTCCCTTCAAACAC +AAAGGTCTGAGGAGGATAAAGACTCTTCAATGCTTCTGGAATAGTCAATTTACATCCTAC +AAATTCCTCAATTGTTTACCTCTGGAGGAGAGAGCACATGGACTTAACTCCAACCTTCAG +GAGCAATAGAACAAAAACATGTTATGGTGCCGTTGAATTGCTGCATTTTATCATAGTCAA +ATCAATTATCTTTACATTTTAAGCCTCTCGGATGTGAAAAAAACTATCAACATCCCTCAA +AAGACTTAAGGAAACATGATCCCTCAGGCTCTTCTGTTTGTGCCTTTTCTATTTCCCTCG +TTGTGTCTCGGGAAATTCCCCATCTACACCATACCGGAAAAGCTCGGCCCTTGGAGTCCC +ATCGACATACATCATCTCAGCTGTCCTAACAATTTGGTTGTGGAGGACGAGGGTTGCGAC +AGTCTGTCAGGGTTTTCTTACATGGAACTGAAGGTGGGTTACATCTCTGCCATAAAGGTG +AATGGGTTCACTTGTACCGGTGTCGTGACAGAAGCTGAGACCTACACCAACTTTGTTGGT +TATGTCACCACCACGTTCAAGAGAAAACACTTCCGCCCTATGCCAGATGCATGCAGAGCT +GCGTACAACTGGAAGACAGCCGGTGACCCTAGATATGAGGAGTCTCTTCACAATCCTTAT +CCTGATTACCATTGGCTACGGACCGTGAAAACCACCAAAGAATCTCTTGTTATCATATCG +CCGAGTGTGGCTGATTTGGACCCATATGACAAATCCCTTCATTCAAGAGTTTTCCCTGGT +GGGAAATGTTTGGGGATAACAATCTCTTCCACCTACTGCTCAACTAACCATGACTATACT +ATCTGGATGCCCGAAGAAGCAAGACTCGGGACATCTTGTGACATTTTTACCAACAGCAAA +GGGAAGAGGGCATCTAAGGGGGGTAGGACTTGCGGATTCGTGGATGAAAGGGGCTTATAT +AAGTCTCTAAAAGGGGCATGTAAACTTAAGCTGTGCGGAGTTCCTGGACTTAGACTTATG +GATGGAACGTGGGTCGCTATTCAGACACCAGGTGAGACCAAATGGTGCTCTCCTGATCAG +CTGGTAAATCTACATGACTTTCGTTCAGATGAGATAGAACATCTCGTCGTGGAGGAGTTG +ATCAAGAAGAGAGAAGAATGTCTAGATGCACTAGAGTCCATCATGACCACCAAATCAGTA +AGCTTCAGACGTCTCAGCAACTTGAGAAAACTTGTCCCTGGGTTTGGAAAGGCATACACT +ATATTCAACAAAACCTTGATGGAGGCTGATGCTCACTACAAGTCAGTTCGGACTTGGGAT +GAAATCATTCCCTCAAAGGGGTGCCTAAGAGTCGGAGGGAGGTGTCATCCTCATGTAAAC +GGAGTGTTTTTCAATGGTATAATTCTGGGTCCGGATGGACATGTCCTGATTCCAGAGATG +CAATCGTCCCTCCTCCAACAGCATATGGAGCTGTTGGAATCCTCTGTAATCCCCCTAATA +CATCCCTTGGCCGACCCATCAACAGTCTTCAAAGACGGTGATGAAGCGGAGGACTTTGTT +GAGGTTCACCTTCCGGACGTTCACAAGCAGGTCTCAGGGGTCGATCTTGGTCTCCCAAAC +TGGGGGAAATATGTGCTGATGAGTGCAGGTGTTCTAGCGACCGTGATACTGACAATCTTC +TTGTTAACATGTTGCAGAAGGGTTAACAGAACAAAACCAAAACAACAAAGTCTTGGGGAG +TCAGGAAGGAAAGTATCGGTTACTCCTCAAAATGGGAAGGTCATGTCTTCATGGGAGTAT +TACAAGAGTGAGGGCAGGACCAGTCTGTGAGTGCTGGCCATCTCCTCCATATCTTGCGTT +CAGAAGATCACCTCTCTTCTAGATCTGGGGGAATCTCTTGTTTTGACAGTCCTTTGTGGA +CTCCGTGCTACAAGGCAAAATTCGAGAGTCAAGAAACTTTCATTAATCATCCCAACTGAT +CAGACACAGTTACGTAGGTTCTGATAATGTATGACGTCTTCTGACAGTGTCAGTGACCAA +TGGTGCTCTCATCCCCCATGGACTGATACCAAAGGTTGTGGACAAACCAACCGATATCTC +AGATAATTCTAGGCTTGAGCCGGGGCAGGGACCGTGGCTAGTCCCCCTACACTAGACTAA +ATAATGGTTAGCTGAGGGAAGCGATTTGCCTCCTATGAAGGACATAAGCAATAGATCACA +ATCATCTTACATCCCGATAAGGTGTGCTTAACTACAAAGGGCTGGGCCATCCAAGCTTTT +CAGCCAAGAAAAAAACTGTGGAATGGAGGAGTAATTAACAACACTTCTCATCCTGAGAAC +TGCACCATGATGCTTGATCCAGGGGAGGTTTATGATGACCCCGTTGATCCGATTGAGTCA +GAGGCCGAGCCGAGAGGGAACCCAACCATTCCCAACATCTTAAGAAACTCTGACTACAAT +CTCAACTCTCCTCTAATAGAGGATCCAGCCAAACTAATGTTAGAATGGTTGAAGACAGGA +AATAGGCCTCTCCGGATAACTTTAACAGACAATTGCTCTAGGTCTTACAAAATTTTGAAG +GATTATTTCAAGAAAGTGGATATAGGATCTATCAAAGTGGGCGGGGCTGCAGCACAATCT +ATGATCTCCCTTTGGTTGCACGGTGCCCACTCTGAATCAAATAGGAGCCGGAAGTGTATA +ACCGACTTGGCTCAGTTCTATTCCAAGTCTTCCCCCATAGAAAAGCTGTTAAATTACACA +CTCGGAAATCGAGGGCTGAGGATCCCCCCAGAGGGGGTCCTAAGTTGCCTTGAGAGGGTC +GATTACGATAAAGCATTTGGGAGGTATCTGGTTAACATATACTCCTCTTACTTATTCTTT +CACGTGATCACCCTTTACATGAACGCCTTGGACTGGGATGAGGAGAAGACCATTCTAGCA +CTGTGGAGGGATTTAACCTCAATAGATATAGGAAAGGACTTGGTCAAGTTTAAAGATCAA +ATATGGGGATTGCTAATTGTGACCAAGGATTTTGTGTACTCACAAAGTTCTAACTGCCTT +TTTGATAGAAACTACACGCTTATGCTTAAAGACCTTTTTTTGTCTCGGTTCAACTCTCTA +ATGATTCTCCTTTCTCCTCCGGAACCCAGATATTCAGACGACCTGATATCCCAGCTGTGT +CAGCTATATATCGCTGGAGATCATGTCTTGTCTATGTGCGGGAACTCTGGTTATGAGGTC +ATTAAAATATTGGAGCCGTACGTTGTGAACAGTTTAGTCCAGAGGGCAGAAAAGTTTAGG +CCTCTCATTCATTCCCTAGGGGACTTCCCTGTATTTATAAGAGATAAGGTAGGTCAGCTT +GAAGGAACATTTGGTCCCAGTGCAAAAAGGTTCTTCAGGGTTCTGGATCAATTCGACAAT +ATACACGACTTAGTCTTTGTATACGGCTGTTATAGGCATTGGGGGCATCCTTACATAGAT +TATAGAAAGGGCTTATCGAAGCTATATGATCAAGTCCACATCAAGAAGGTGATAGATAAG +ACTTACCAGGAGTGTTTGGCCAGCGACCTGGCCAAAAGGATCCTCAGGTGGGGATTTGAC +AAGTATTCCAAATGGTATATTGATTCAAGACTCCTCTCAAAGGACCACCCCCTAACTCCT +TATATCAAAACCCAGACGTGGCCTCCAAAACATGTGGTAGATTTGGTGGGTGACACTTGG +CATAAGCTCCCGATCACCCAGATCTTCGAGATCCCCGAATCAATGGACCCATCTGAGATA +CTAGATGATAAATCACACTCTTTTACTAGAACTAGACTAGCGTCCTGGCTATCAGAGAAC +AGAGGAGGGCCGGTCCCCAGCGAGAAGGTCATTATCACTGCTCTTTCCAAGCCTCCTGTC +AACCCCAGGGAATTTCTGAAATCTATAGACCTGGGAGGATTGCCGGACGAGGATTTGATA +ATCGGCCTCAAGCCTAAGGAAAGAGAGTTGAAGATAGAAGGTCGGTTTTTTGCCTTGATG +TCCTGGAATCTAAGGCTGTATTTTGTCATCACAGAAAAGCTCCTAGCCAATTATATCTTG +CCACTTTTTGACGCACTGACTATGACAGACAACTTGAACAAAGTGTTTAAAAAGCTGATC +GACAGAGTCACCGGACAGGGGCTTTTAGACTACTCCAGAGTTACATACGCTTTTCACCTG +GACTATGAAAAGTGGAACAATCATCAGAGGCTGGAGTCGACAGAGGATGTATTTTCTGTA +CTTGATCAAGTATTCGGATTAAAGAGGGTGTTTTCCAGAACTCATGAGTTTTTTCAGAAG +TCTTGGATCTATTACTCAGATAGATCCGACCTCATCGGGTTATGGGAAGATCAAATATAC +TGTTTGGACATGTCAAACGGCCCGACATGCTGGAACGGCCAGGATGGCGGGCTAGAGGGT +TTGCGACAGAAAGGCTGGAGTCTGGTTAGCCTATTAATGATAGATCGTGAATCTCAAACC +AGGAACACAAGAACTAAAATACTAGCTCAGGGAGACAACCAAGTTCTGTGTCCGACATAT +ATGCTGTCGCCGGGGCTCTCTCGAGAGGGGCTTCTCTACGAGTTGGAGAGCATATCAAGA +AACGCTCTCTCGATATATCGTGCCATCGAGGAAGGGGCATCCAAACTGGGGCTCATCATA +AAGAAGGAAGAGACCATGTGTAGCTATGACTTTCTCATCTATGGGAAAACTCCTTTATTT +CGAGGTAACATCTTGGTGCCTGAGTCCAAAAGATGGGCTAGGGTCTCCTGTATCTCCAAC +GACCAGATAGTCAACCTTGCCAATATAATGTCAACTGTATCCACTAATGCATTGACTGTC +GCCCAACACTCTCAGTCTTTGATTAAACCGATGAGGGACTTCCTGCTTATGTCAGTACAA +GCTGTCTTCCATTACTTGCTGTTTAGCCCCATCTTGAAAGGCAGAGTTTACAAGATCCTG +GGTGCCGAGGGGGACAATTTTCTTCTAGCTATGTCTAGGATAATTTATTTAGACCCCTCA +TTGGGGGGAGTATCTGGAATGTCTCTTGGAAGGTTCCACATACGTCAGTTCTCAGATCCT +GTCTCAGAAGGGTTGTCCTTCTGGAGAGAGATATGGCTGAGCTCCAGTGAGTCTTGGGTT +CATGCACTGTGTCAAGAGGCAGGAAACCCAGATCTGGGAGAGAGAACACTGGAAAGCTTC +ACTCGCCTGCTCGAGGATCCTACTACCCTAAACATTAAAGGAGGGGCCAGTCCTACCATT +CTTCTCAAAGATGCGATCAGAAAAGCCTTGTATGATGAGGTAGACAAGGTGGAGAACTCA +GAGTTTAGGGAAGCGATCCTCTTGTCTAAGACTCATAGGGATAATTTCATACTCTTCTTG +AAATCTGTTGAGCCTTTGTTCCCCCGATTTCTCAGTGAGCTCTTCAGTTCATCATTCCTG +GGGATTCCCGAATCAATCATTGGGTTGATACAAAACTCCAGAACAATAAGAAGACAGTTT +AGGAGGAGTCTCTCAAGAACCTTGGAAGAGTCCTTTTATAACTCAGAGATCCATGGGATC +AACCGGATGACCCAGACTCCCCAGAGGGTCGGGAGAGTTTGGCCCTGCTCTTCTGAGAGG +GCAGACCTCCTAAGAGAAATCTCATGGGGAAGGAAGGTGGTCGGCACAACAGTTCCTCAC +CCCTCTGAGATGTTGGGGTTGATTCCCAAATCCTCTATTTCCTGTACTTGTGGAGTAACA +GGAGGGGGAAACCCTCGAATATCAGTGTCTGTGCTTCCATCTTTTGATCAGTCATTTTTT +TCGAGAGGCTCTCTGAAAGGATATCTGGGCTCATCCACTTCCATGTCGACCCAGCTATTC +CATGCCTGGGAGAAAGTCACCAATGTTCATGTGGTGAAAAGAGCTCTGTCACTCAAGGAA +TCTATAAACTGGTTCATTACAAGAGACTCAAATTTGGCCCAAACTTTGATTAGGAACATA +ATGTCTCTGACAGGCCCAGATTTTCCTCTGGAAGAGGCCCCTGTCTTCAAGAGGACAGGG +TCTGCCCTGCATAGGTTCAAGTCGGCTAGATACAGTGAAGGGGGGTACTCTTCAGTTTGT +CCAAATCTCCTCTCTCACATCTCTGTCAGTACAGACACGATGTCTGATTTGACTCATGAT +GGGATAAATTATGACTTTATGTTTCAGCCGTTGATGCTTTATGCGCAAACATGGACATCA +GAACTGGTGCAAAAAGATACACGGCTGAAAGATTCTACCTTTCACTGGCATCTTCGATGT +AACAAGTGCATAAGGCCCATCGATGATATCACCCTTGATACCTCTCAGATCTTCGAGTTC +CCAGATGTCTCAAGAAGGATATCTAGGATGGTTTCTGGAGCTGTGCCTCATTTCAGAAAG +CTTCCTGATATTCGTTTGAGACCAGGTGACTTCGAATCTCTAAGTGGTAAAGAGAAGTCT +CGCCACATAGGATCGGCCCAAGGACTCTTGTATTCAATCTTGGTTGCGATCCATGACTCT +GGGTATAATGATGGAACCATTTTCCCTGTCAACATATATAGCAAGGTCTCCCCTAGAGAC +TATTTGAGAGGGCTTGCGAGAGGAGTCTTGATAGGGTCCTCGATTTGCTTCTTGACAAGA +ATGACAAACATTAACATCAATAGACCTCTTGAACTGATCTCAGGAGTGATATCATACATC +CTCTTGAGACTGGACAATCACCCATCCTTGTATATAATGCTCAGAGAGCCATCCCTTAGA +GGAGAGATATTTTCTATTCCCCAAAAGGTCCCCGCTGCTTACCCGACCACGATGAAAGAA +GGTAACAGATCTGTTCTGTGCTACCTCCAACATGTGTTGCGCTACGAACGAGAGGTGATC +ACTGCATCTCCGGAGAATGACTGGTTGTGGATCTTCTCGGACTTTAGAAGCTCTAAGATG +ACATATCTGACTCTCATCACCTACCAGTCTCATCTGTTACTTCAGAAGGTTGAGAAGAAT +CTCTCCAAGAGTATGAGAGCCAACCTACGGCAAATGAGCTCTCTGATGAGGCAGGTGCTG +GGTGGCCATGGTGAAGACACCTTAGAGTCAGACGAAGACATCCAGAGGCTGTTAAGAGAC +TCTCTGCGTAGGACAAGGTGGGTAGACCAGGAAGTGCGCCATGCAGCGAGAACTATGACA +GGAGTTTACAGCCCCACCAAGAAGATGTCACGCAAAGCCGGGTGTTCAGAATGGGTCTGC +TCTGCACAGCAGGTTGCGGTTTCGACCTCATCGAACCCAGCCCCTGTTTCAGAGCTGGAC +GTCAGAGCCCTCTCCAAAAGGCTTCAAAACCCGTTGATCTCTGGACTGAGAGTGGTTCAG +TGGGCGACAGGGGCCCATTATAAGCTCAAACCTATTCTGGATGATCTCAATGTCTTTCCA +TCTCTATGTCTTGTGGTCGGGGATGGGTCAGGGGGGATATCAAGAGCAGTACTTAACATG +TTTCCTGATGCCAAACTCGTATTCAACAGCCTGTTAGAAGTAAATGACCTGATGGCATCA +GGAACACATCCGTTGCCCCCCTCAGCAATCATGAGTGGAGGAGATGACATCATATCCAGG +GTAATTGGCTTCGACTCCATCTGGGAGAAACCTTCTGACTTGAGGAACTTGACCACATGG +AGGTACTTCCAGTCAGTCCAAGAACAAGTAAATATGTCATACGACCTTATCATTTGTGAT +GCAGAGGTCACTGACATTGCATCAATCAACCGAATAACCCTGCTAATGTCTGATTTTGCA +TTGTCCATAGACGGCCCACTTTATCTGGTTTTCAAAACTTACGGTACCATGCTTGTAAAC +CCTGACTACAGAGCAATTCAACATCTATCCAGAGCATTTCCTGCGGTCACTGGATTCATA +ACTCAGATGACCTCGTCCTTCTCATCCGAGCTATATCTCAGATTCTCAAAGCGGGGGAAG +TTCTTCCGGGATGCAGAGTATTTGACTTCTTCCACCCTTCGAGAAATGAGCCTTGTATTG +TTCAACTGTAGCAGCCCCAAGAGTGAGATGCAGAGAGCCCGCTCTCTAAATTACCAAGAT +CTTGTAAGAGGATTCCCAGATGAGATCATATCCAATCCTTACAGCGAAATGATCATAACT +CTGATTGACAGTGATGTAGAATCTTTCCTGGTTCACAAAATGGTAGATGATCTAGAGTTG +CAGCGAGGAACTTTGTCTAAAGTTTCTATCATTATAGCCATCATGATAGTCTTCTCCAAT +AGGGTGTTCAATGTCTCAAAACCACTGACTGACCCTTTATTCTATCCGCCATCTGATCCC +AAGATCTTGAGACACTTCAACATATGCTGCGGCACCATGATGTACTTGTCTACCGCCTTA +GGGGATGTGCCCAGCTTCTCGAGACTTCATGACCTGTACAACAGACCCATAACTTATTAT +TTTAGGAAGCAAGTCATCCGGGGGAGTGTTTACCTGTCCTGGAGTTGGTCTGATGACACT +TCAGTGTTTAAAAGGGTGGCTTGCAACTCTAGCTTGAGTCTCTCATCTCACTGGATCAGG +TTGATTTACAAAATAGTGAAGACCACCAGACTTGTAGGAAGGGTCGAGGACCTGTCCAGG +GAGGTAGAGAGGCACCTTCGGGGGTACAACAGATGGATCACCCTCGATGACATTAGATCC +CGATCATCTCTGTTAGATTATAGCTGCTTGTAAGACTAAGCACTCTCGAGGGAATGTACA +GACTAAGATTATGGGACGGTGTAACCTGAAAAAAACAAGATCCCGATTCATAACTTCTGT +TTACTTGATTGTTTTTCCATCTTTATTGTTTTTTTGTTAAGCGT diff --git a/test/input/TestSnpEff/vphaser2.RBV16.mapped.txt.gz b/test/input/TestSnpEff/vphaser2.RBV16.mapped.txt.gz new file mode 100644 index 000000000..8e12d4e6f Binary files /dev/null and b/test/input/TestSnpEff/vphaser2.RBV16.mapped.txt.gz differ diff --git a/test/integration/test_intrahost.py b/test/integration/test_intrahost.py index ee913620c..83a8d2030 100644 --- a/test/integration/test_intrahost.py +++ b/test/integration/test_intrahost.py @@ -12,10 +12,18 @@ # module-specific import intrahost +import interhost +import tools.mafft import util.file +import util.vcf import test +from test import TestCaseWithTmp import tools +# third-party +import pytest +from mock import patch + class TestPerSample(test.TestCaseWithTmp): ''' This tests step 1 of the iSNV calling process (intrahost.vphaser_one_sample), which runs V-Phaser2 on @@ -86,3 +94,65 @@ def test_vphaser_one_sample_3libs_and_chi2(self): intrahost.vphaser_one_sample(inBam, refFasta, outTab, vphaserNumThreads=test._CPUS, minReadsEach=6, maxBias=3) expected = os.path.join(myInputDir, 'vphaser_one_sample_3libs_expected.txt') self.assertEqualContents(outTab, expected) + +class TestSnpEff(TestCaseWithTmp): + @pytest.fixture(autouse=True) + def capsys(self, capsys): + self.capsys = capsys + + def test_snpeff(self): + temp_dir = tempfile.gettempdir() + input_dir = util.file.get_test_input_path(self) + + ref_fasta = os.path.join(input_dir,"ref-rabies-JQ685920.fasta") + assembly_fasta = os.path.join(input_dir,"RBV16.fasta") + isnv_calls = os.path.join(input_dir,"vphaser2.RBV16.mapped.txt.gz") + + # align sample to reference to create MSA + msa_fasta = util.file.mkstempfname('.fasta') + expected_msa_fasta = os.path.join(input_dir,"msa.fasta") + args = [ref_fasta, assembly_fasta, msa_fasta, "--localpair", "--preservecase"] + args = interhost.parser_align_mafft(argparse.ArgumentParser()).parse_args(args) + args.func_main(args) + test.assert_equal_contents(self, msa_fasta, expected_msa_fasta) + + # merge (one) VCF to merged vcf + merged_vcf = os.path.join(temp_dir,"merged.vcf.gz") + expected_merged_vcf = os.path.join(input_dir,"merged.vcf.gz") + args = [ref_fasta, merged_vcf, "--isnvs", isnv_calls, "--alignments", msa_fasta, "--strip_chr_version", "--parse_accession"] + args = intrahost.parser_merge_to_vcf(argparse.ArgumentParser()).parse_args(args) + args.func_main(args) + vcf = util.vcf.VcfReader(merged_vcf) + expected_vcf = util.vcf.VcfReader(expected_merged_vcf) + rows = list(vcf.get()) + expected_rows = list(expected_vcf.get()) + #self.assertEqual(rows, expected_rows) + + # run snpEff against merged VCF to predict SNP effects + eff_vcf = os.path.join(temp_dir,"ann_eff.vcf.gz") + expected_eff_vcf = os.path.join(input_dir,"ann_eff.vcf.gz") + args = [merged_vcf, "JQ685920", eff_vcf, "--emailAddress=test@example.com"] + with self.capsys.disabled(): + args = interhost.parser_snpEff(argparse.ArgumentParser()).parse_args(args) + args.func_main(args) + vcf = util.vcf.VcfReader(eff_vcf) + expected_vcf = util.vcf.VcfReader(expected_eff_vcf) + rows = list(vcf.get()) + expected_rows = list(expected_vcf.get()) + #self.assertEqual(rows, expected_rows) + + # create tabular iSNV output + eff_txt = os.path.join(temp_dir,"ann_eff.txt.gz") + expected_eff_txt = os.path.join(input_dir,"ann_eff.txt.gz") + args = [eff_vcf, eff_txt] + args = intrahost.parser_iSNV_table(argparse.ArgumentParser()).parse_args(args) + args.func_main(args) + for outrow, expectedrow in zip(util.file.read_tabfile(eff_txt),util.file.read_tabfile(expected_eff_txt)): + for colout, colexpected in zip(outrow, expectedrow): + # if it casts to float, perform approx comparison + try: + f1=float(colout) + f2=float(colexpected) + self.assertAlmostEqual(f1, f1) + except ValueError: + self.assertEqual(sorted(sorted(colout.split(","))), sorted(sorted(colexpected.split(",")))) diff --git a/tools/snpeff.py b/tools/snpeff.py index 993c7c808..409551df3 100644 --- a/tools/snpeff.py +++ b/tools/snpeff.py @@ -22,9 +22,9 @@ _log = logging.getLogger(__name__) TOOL_NAME = 'snpeff' -TOOL_VERSION = '4.1l' +TOOL_VERSION = '4.3.1t' -URL = 'http://downloads.sourceforge.net/project/snpeff/snpEff_v4_1l_core.zip' +URL = 'http://downloads.sourceforge.net/project/snpeff/snpEff_v4_3t_core.zip' class SnpEff(tools.Tool): @@ -43,7 +43,7 @@ def __init__(self, install_methods=None, extra_genomes=None): def version(self): return "4.1" - def execute(self, command, args, JVMmemory=None, stdin=None, stdout=None): # pylint: disable=W0221 + def execute(self, command, args, JVMmemory=None, stdin=None, stdout=None, stderr=None): # pylint: disable=W0221 if not JVMmemory: JVMmemory = self.jvmMemDefault @@ -59,7 +59,7 @@ def execute(self, command, args, JVMmemory=None, stdin=None, stdout=None): # ] + args _log.debug(' '.join(tool_cmd)) - return util.misc.run_and_print(tool_cmd, stdin=stdin, buffered=True, silent=("databases" in command), check=True) + return util.misc.run_and_print(tool_cmd, stdin=stdin, stderr=stderr, buffered=True, silent=("databases" in command), check=True) def has_genome(self, genome): if not self.known_dbs: @@ -76,7 +76,7 @@ def download_db(self, dbname, verbose=False): self.known_dbs.add(dbname) self.installed_dbs.add(dbname) - def create_db(self, accessions, emailAddress, JVMmemory): + def create_db(self, accessions, emailAddress=None, JVMmemory=None): sortedAccessionString = ", ".join([util.genbank.parse_accession_str(acc) for acc in sorted(accessions)]) databaseId = hashlib.sha256(sortedAccessionString.encode('utf-8')).hexdigest()[:55] @@ -109,27 +109,30 @@ def create_db(self, accessions, emailAddress, JVMmemory): self.execute('build', args, JVMmemory=JVMmemory) def available_databases(self): - command_ps = self.execute("databases", args=[]) - - split_points = [] - keys = ['Genome', 'Organism', 'Status', 'Bundle', 'Database'] - self.installed_dbs = set() - self.known_dbs = set() - for line in command_ps.stdout.decode("utf-8").splitlines(): - line = line.strip() - if not split_points: - if not line.startswith('Genome'): - raise Exception() - split_points = list(line.index(key) for key in keys) - elif not line.startswith('----'): - indexes = split_points + [len(line)] - row = dict((keys[i], line[indexes[i]:indexes[i + 1]].strip()) for i in range(len(split_points))) - self.known_dbs.add(row['Genome']) - if row.get('Status') == 'OK': - self.installed_dbs.add(row['Genome']) - yield row - - def annotate_vcf(self, inVcf, genomes, outVcf, emailAddress, JVMmemory=None): + # do not capture stderr, since snpEff writes 'Picked up _JAVA_OPTIONS' + # which is not helpful for reading the stdout of the databases command + with open(os.devnull, "wb") as devnull: + command_ps = self.execute("databases", args=[], stderr=devnull) + + split_points = [] + keys = ['Genome', 'Organism', 'Status', 'Bundle', 'Database'] + self.installed_dbs = set() + self.known_dbs = set() + for line in command_ps.stdout.decode("utf-8").splitlines(): + line = line.strip() + if not split_points: + if not line.startswith('Genome'): + raise Exception() + split_points = list(line.index(key) for key in keys) + elif not line.startswith('----'): + indexes = split_points + [len(line)] + row = dict((keys[i], line[indexes[i]:indexes[i + 1]].strip()) for i in range(len(split_points))) + self.known_dbs.add(row['Genome']) + if row.get('Status') == 'OK': + self.installed_dbs.add(row['Genome']) + yield row + + def annotate_vcf(self, inVcf, genomes, outVcf, emailAddress=None, JVMmemory=None): """ Annotate variants in VCF file with translation consequences using snpEff. """ diff --git a/util/file.py b/util/file.py index df38b9fb2..66f87e728 100644 --- a/util/file.py +++ b/util/file.py @@ -371,7 +371,7 @@ def read_tabfile_dict(inFile): # truncate the row to the header length, and only include extra items if they are not spaces # (takes care of the case where the user may enter an extra space at the end of a row) row = row[:len(header)] + [item for item in row[len(header):] if len(item)] - assert len(header) == len(row) + assert len(header) == len(row), "%s != %s" % (len(header), len(row)) yield dict((k, v) for k, v in zip(header, row) if v) diff --git a/util/misc.py b/util/misc.py index 0716879af..fdc762cec 100644 --- a/util/misc.py +++ b/util/misc.py @@ -220,7 +220,7 @@ def run(args, stdin=None, stdout=None, stderr=None, shell=False, os.remove(stderr_fn) -def run_and_print(args, stdout=None, stderr=None, +def run_and_print(args, stdout=None, stderr=subprocess.STDOUT, stdin=None, shell=False, env=None, cwd=None, timeout=None, silent=False, buffered=False, check=False, loglevel=None): @@ -238,7 +238,7 @@ def run_and_print(args, stdout=None, stderr=None, args, stdin=stdin, stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, + stderr=stderr, env=env, cwd=cwd, timeout=timeout, @@ -268,7 +268,7 @@ def run_and_print(args, stdout=None, stderr=None, raise(e) else: result = run(args, stdin=stdin, stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, env=env, cwd=cwd, + stderr=stderr, env=env, cwd=cwd, timeout=timeout, check=check) if not silent and not loglevel: print(result.stdout.decode('utf-8')) @@ -281,7 +281,7 @@ def run_and_print(args, stdout=None, stderr=None, 'CompletedProcess', ['args', 'returncode', 'stdout', 'stderr']) process = subprocess.Popen(args, stdin=stdin, stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, env=env, + stderr=stderr, env=env, cwd=cwd) output = [] while process.poll() is None: