From 6e8983a9090baa6f9841f598d8dbfba32d8f5336 Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Wed, 27 Sep 2023 11:45:19 -0700 Subject: [PATCH] improve reverse decision logic --- intact/intact.py | 13 +- tests/data-edgy.fasta | 4 +- tests/expected-results-edgy/errors.json | 75 ++++++++- tests/expected-results-edgy/holistic.json | 14 +- tests/expected-results-edgy/nonintact.fasta | 165 +++++++++++++++++++- tests/expected-results-edgy/orfs.json | 158 ++++++++++++++++++- 6 files changed, 416 insertions(+), 13 deletions(-) diff --git a/intact/intact.py b/intact/intact.py index 127200c..95af45c 100644 --- a/intact/intact.py +++ b/intact/intact.py @@ -770,6 +770,11 @@ def analyse_single_sequence(writer, sequence, blast_rows): else: reference_name = sorted(subtype_choices.keys())[0] + blast_orientation_statistics = { "plus": 0, "minus": 0 } + for blast_row in blast_rows: + if blast_row.qseqid == reference_name: + blast_orientation_statistics[blast_row.sstrand] += abs(blast_row.qend - blast_row.qstart) + holistic.inferred_subtype = reference_name reference = subtype_choices[reference_name] aligned_subtype = AlignedSequence(this=reference, reference=st.HXB2()) @@ -777,13 +782,11 @@ def analyse_single_sequence(writer, sequence, blast_rows): forward_aligned_sequence = AlignedSequence(this=sequence, reference=aligned_subtype.this) reverse_aligned_sequence = forward_aligned_sequence.reverse() - forward_score = forward_aligned_sequence.alignment_score() - reverse_score = reverse_aligned_sequence.alignment_score() - if forward_score >= reverse_score: + if blast_orientation_statistics["minus"] < blast_orientation_statistics["plus"] \ + or reverse_aligned_sequence.alignment_score() <= forward_aligned_sequence.alignment_score(): aligned_sequence = forward_aligned_sequence else: - log.info("Reversing sequence " + sequence.id + "; forward score " - + str(forward_score) + "; reverse score " + str(reverse_score)) + log.info("Reversing sequence " + sequence.id) aligned_sequence = reverse_aligned_sequence sequence = aligned_sequence.this diff --git a/tests/data-edgy.fasta b/tests/data-edgy.fasta index 158160b..7fdf7c8 100644 --- a/tests/data-edgy.fasta +++ b/tests/data-edgy.fasta @@ -7,8 +7,10 @@ A >empty-sequence >empty-sequence2 ->Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED +>Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSECOMPLEMENTED TGCTAGAGATTTTCCACACTGACTAAAAGGGTCTGAGGGATCTCTAGTTACCAGAGTCACACAACAGACGGGCACACACTACTTGAAGCACTCAAGGCAAGCTTTATTGAGGCTTAAGCAGTGGGTTCCCTAGTTAGCCAGAGAGCTCCCAGGCTCAGATCTGGTCTAACCAGAGAGACCCAGTACAGGCAAAAAGCAGCTGCTTATATGCAGGATCTGAGGGCTCGCCACTCCCCAGTCCCGCCCAGGCCACGCCTCCCTGGAAAGTCCCCAGCGGAAAGTCCCTTGTAGCAAGCTCGATGTCAGCAGTTCTTGAAGTACTCCGGATGCAGCTCTCGGGCCACGTGATGAAATGCTAGGCGGCTGTCAAACCTCCACTCTAACACTTCTCTCTCCGGGTCATCCATCCCATGCAGGCTCACAGGGTGTAACAAGCTGGTGTTCTCTCCTTTATTGGCCTCTTCTATCTTATCTGGCTCAACTGGTACTAGCTTGTAGCACCATCCAAAGGTCAGTGGATATCTGACCCCTGGCCCTGGTGTGTAGTTCTGCCAATCAGGGAAGTAGCCTTGTGTGTGGTAGATCCACAGATCAAGGATATCTTGTCTTCTTTGGGAGTGAATTAGCCCTTCCAGTCCCCCCTTTTCTTTTAAAAAGTGGCTAAGATCTACAGCTGCCTTGTAAGTCATTGGTCTTAAAGGTACCTGAGGTGTGACTGGAAAACCCACCTCCTCCTCCTCTTGTGCTTCTAGCCAGGCACAAGCAGCATTGGTAGCTGCTGTATTGCTACTTGTGATTGCTCCATGTTTTTCCAGGTCTCGAGATGCTGCTCCCACCCTATCTGCTGCTGGCTCAGCTCGTCTCATTCTTTCCCTTACAGTAGGCCATCCAATCACACTACTTTTTGACCACTTGCCACCCATCTTATAGCAAAATCCTTTCCAAGCCCTGTCTTATTCTTCTAGGTATGTGGCGAATAGCTCTACAAGCTCCTTGTACTACTTCTATAACCCTATCTGTCCCCTCAGCTACTGCTATGGCTGTGGCATTGAGCAAGCTAACAGCACTATTCTTTAGTTCCTGACTCCAATACTGTAGGAGATTCCACCAATATTTGAGGGCTTCCCACCCCCTGCGTCCCAGAAGTTCCACAATCCTCGTTACAATCAAGAGTAAGTCTCTCAAGCGGTGGTAGCTGAAGAGGCACAGGCTCCGCAGATCGTCCCAGATAAGTGCCAAGGATCCGTTCACTAATCGAATGGATCTGTCTCTGTCTCTCTCTCCACCTTCTTCTTCGGTTCCTTCGGGCCTGTCGGGTCCCCTCGGGGTTGGGAGGTGGGTCTGAAACGATAATGGTGAATATCCCTGCCTAACTCTATTCACTATAGAAAGTACAGCAAAAACTATTCTTAAACCTACCAAGCCTCCTACTATCATTATGAATAATTTTATATACCACAGCCAATTTGTTATGTTAAACCAATTCCACAAACTTGCCCATTTATCTAATTCCAATAATTCTTGTTCATTCTTTTCTTGCTGGTTTTGCGATTCTTCAATTAAGGAGTGTATTAAGCTTGTGTAATTGTTAATTTCTCTGTCCCACTCCATCCAGGTCGTGTGATTCCAAATCTGTTCCAGAGATTTATTACTCCAACTAGCATTCCAAGGCACAGCAGTGGTGCAAATGAGTTTTCCAGAGCAACCCCAAATCCCCAGGAGCTGTTGATCCTTTAGGTATCTTTCCACAGCCAGGATTCTTGCCTGGAGCTGCTTGATGCCCCAGACTGTGAGTTGCAACAGATGCTGTTGCGCCTCAATAGCCCTCAGCAAATTGTTCTGCTGCTGCACTATACCAGACAATAATTGTCTGGCCTGTACCGTCAGCGTCATTGAGGCTGCGCCCATAGTGCTTCCTGCTGCTCCCAAGAACCCAAGGAACAAAGCTCCTATTCCCACTGCTCTTTTTTCTCTCTGCACCACTCTTCTCTTTGCCTTGGTGGGTGCTACTCCTAATGGTTCAATTTTTACTACTTTATATTTATATAATTCACTTCTCCAATTGTCCCTCATATCTCCTCCTCCAGGTCTGAAGATCTCGGACTCATTGTTGCTATTACCACCATCTCTTGTTAATAGCAGCCCTGTAATATTTGATGAACATCTAATTTGTCCACTGATGGGAGGGGCATACATTGCTTTTCCTACTTTCTGCCACATGTTTATAATTTGTTTTATTCTGCATGGGAGGGTGATTGTGTCACTTCCTTCAGTGTTATTTGACCCTTCAGTACTCCAAGTACTATTAAACCAAGTACTATTAAACAGTTGTGTTGAATTACAGTAGAAAAATTCCCCTCCACAATTAAAACTGTGCGTTACAATTTCTGGGTCCCCTCCTGAGGATTGCTTAAAGATTATTGTTTTATTATTTCCAAATTGTTCTCTTAATTTGCTAGCTATCTGTTTTAAAGTGTTATTCCATTTTGCTCTACTAATGTTACAATGTGCTTGTCTCATATTTCCTATTTTTCCTATTGTAACAAATGCTCTCCCTGGTCCTCTCTGGATACGGATTCTTTTTCTTGTATTGTTGTTGGGTCTTGTACAATTAATTTCTACAGATGTGTTCAGCTGTACTATTATGGTTTTAGCATTGTCCGTGAAATTGACAGATCTAATTACTACCTCTTCTTCTGCTAGACTGCCATTTAACAGCAGTTGAGTTGATACTACTGGCCTAATTCCATGTGTACATTGTACTGTGCTGACATTTGTACATGGTCCTGTTCCATTGAACGTCTTATTATTACATTTTAGAATCGCAAAACCAGCCGGGGCACAATAATGTATGGGAATTGGCTCAAAGGATACCTTTGGACAGGCCTGTGTAATGACTGAGGTGTTACAACTTGTCAACTTATAGCTGGTAGTATCATTATCTATTGGTATTATATCAAGTTTATAAAAAAATGCATATTCTTTCTGCACCTTACCTCTTATGCTTGTGCTGATATTGAAAGAGCAGTTTTTTATCTCTCCTTTCTCCATTATCATTCTCCCGCTACTACTATTGGTATTAGTATCATTCTTCAAATCAGTGCACTTTAAACTAACACAGAGTGGGGTTAATTTTACACATGGCTTTAGGCTTTGATCCCATAAACTGATTATATCCTCATGCATCTGTTCTACCATGTCATTTTTCCACATGTTAAAATTTTCTGTCACATTTACCAATACTACTTCTTGTGGGTTGGGGTCTGTGGGTACACAGGCATGTGTGGCCCAAACATTATGTACCTCTGTATCATATGCTTTAGCATCTGATGCACAAAATAGAGTGGTGGTTGCTTCCTTCCACACAGGTACCCCATAATAGACTGTGACCCACAATTTTTCTGTAGCACTACAGATCATCAACATCCCAAGGAGCATGGTGCCCCATCTCCACCCCCATCTCCACAAGTGCTGATATTTCTCCTTCACTCTCATTGCCACTGTCTTCTGCTCTTTCTATTAGTCTATCAATTAACCTGTCTATTTTTCTTTGTCTTAATATTTTCCTATATTCTATGATTACTATGGACCACACAACTATTGCTATTATTATTGCTACTACTAATGCTACTATTGCTACTATTGGTATAGGTTGCATTACATGTACTACTTACTGCTTTGATAGAGAAGCTTGATGAGTCTGACTGTTCTGATGAGCTCTTCGTCGCTGTCTCCGCTTCTTCCTGCCATAGGAGATGCCTAAGGCTTTTGTTATGAAACAAACTTGGCAATGAAAGCAACACTTTTTACAATAGCAATTGGTACAAGCAGTTTTAGGCTGACTTCCTGGATGCTTCCAGGGCTCTAGTCTAGGATCTACTGGCTCCATTTCTTGCTCTCCTCTGTCGAGTAACGCCTATTCTGCTATGTCGACACCCAATTCTGAAATGGATAAACAGCAGTTGTTGCAGAATTCTTATTATGGCTTCCACTCCTGCCCAAGTATCCCCATAAGTTTCATAGATATGTTGCCCTAAGCCATGGAGCCAAATCCTAGGAAAATGTCTAACAGCTTCATTCTTAAGCTCCTCTAAAAGCTCTAGTGTCCATTCATTGTGTGGCTCCCTCTGTGGCCCTTGGTCTTCTGGGGCTTGTTCCATCTATCCTCTGTCAGTTTCGTAACACTAGGCAAAGGTGGCTTTATCTTTTTTGGTGTTATTAATGCTGCTAGTGCCAAGTATTGTAGAGATCCTACCTTGTTATGTCCTGCTTGATATTCACACCTAGGGCTAACTATGTGTCCTAATAAGGCCTTTCTTATAGCAGAGTCTGAAAAACAGTCAAAGTAATACAGATGAATTAGTTGGTCTGCTAGTTCAGGGTCTACTTGTGTGCTATATCTCTTTTTCCTCCATTCTATGGAGACTCCCTGACCCAAATGCCAGTCTCTTTCTCCTGTATGCAGACCCCAATATGTTGTTATTACCAATCTAGCATCCCCTAGTGGGATGTGTACTTCTGAACTTATTCTTGGATGAGGGCTTTCATAGTGATGTCTATAAAACCATCCCCTAGCTTTCCCTGAAACATACATATGGTGTTTTACTAAACTTTTCCATGTTCTAATCCTCATCCTGTCTACTTGCCACACAATCATCACCTGCCATCTGTTTTCCATAATCCCTAATGATCTTTGCTTTTCTTCTTGGCACTACTTTTATGTCACTATTATCTTGTATTACTACTGCCCCTTCACCTTTCCAGAGGAGCTTTGCTGGTCCTTTCCAAAGTGGATTTCTGCTGTCCCTGTAATAAACCCGAAAATTTTGAATTTTTGTAATTTGTTTTTGTAATTCTTTAGTTTGTATGTCTGTTGCTATTATGTCTACTATTCTTTCCCCTGCACTGTACCCCCCAATCCCCCCTTTTCTTTTAAAATTGTGGATGAATACTGCCATTTGTACTGCTGTCTTAAGATGTTCAGCCTGATCTCTTACCTGTCCTATAATTTTCTTTAATTCTTTATTCATAGATTCTACTACTCCTTGACTTTGGGGATTGTAGGGAATTCCAAATTCCTGCTTGATTCCCGCCCACCAACAGGCGGCCCTAACCGTAGCACCGGTGAAATTGCTGCCATTGTCAGTATGTATTGTTTTTACTGGCCATCTTCCTGCTAATTTTAAAAGAAAATATGCTGTTTCCTGCCCTGTTTCTGCTGGAATAACTTCTGCTTCTATATATCCACTGGCTACATGAACTGCTACCAGGATAACTTTTCCTTCTAAATGTGTACAATCTAGTTGCCATATTCCTGGACTACAGTCTACTTGTCCATGCATGGCTTCTCCTTTTAGCTGACATTTATCACAGCTGGCTACTATTTCTTTTGCTACTACAGGTGGCAGGTTAAAATCACTAGCCATTGCTCTCCAATTACTGTGATATTTCTCATGTTCATCTTGGGCCTTATCTATTCCATCTAAAAATAGTACTTTCCTGATTCCAGCACTGACTAATTTATCTACTTGTTCATTTCCTCCAATTCCTTTGTGTGCTGGTACCCATGCCAGATAGACCTTTTCCTTTTTTATTAACTGCTCTATTATTTGATTGACTAACTCTGATTCACTTTGATCTGGTTGTGCTTGAATGATTCCTAATGCATATTGTGAGTCTGTTACTATGTTTACTTCTAATCCCGAATCCTGCAAAGCTAGATAAATTGCTTGTAACTCAGTCTTCTGATTTGTTGTGTCAGTTAGGGTGACAACTTTTTGTCTTCCTCTATTAGTAACATATCCTGCTTTTCCTAATTTAGTCTCCCTGTTAGCTGCCCCATCTACATAGAAGGTTTCTGCTCCTACTATGGGTTCTTTCTCTAACTGGTACCATAATTTCACTAAGGGAGGGGTATTAACAAACTCCCACTCAGGAATCCAGGTGGCTTGCCAATACTCTGTCCACCATGTTTCCCATGTTTCCTTTTGTATGGGCAGTTTAAATTTAGGAGTCTTTCCCCATATTACTATGCTTTCTGTGGTTATTTTTTGCACTGCCTCTGTTAATTGTTTTACATCATTAGTGTGGGCACCCCTCATTCTTGCATATTTTCCTGTTTTCAGATTTTTAAATGGCTCTTGATAAATTTGATATGTCCATTGGCCTTGCCCCTGCTTCTGTATTTCTGCTATTAAGTCTTTTGATGGGTCATAATACACTCCATGTACTGGTTCTTTTAGAATCTCTCTGTTTTCTGCCAGTTCTAGCTCTGCTTCTTCTGTTAGTGGTATTACTTCTGTTAGTGCTTTGGTTCCTCTAAGGAGTTTACATAATTGCCTTACTTTAATCCCTGGGTAAATCTGACTTGCCCAATTCAATTTCCCCACTAACTTCTGTATGTCATTGACAGTCCAGCTGTCTTTTTCTGGCAGCACTATAGGCTGTACTGTCCATTTATCAGGATGGAGTTCATAACCCATCCAAAGGAATGGAGGTTCTTTCTGATGTTTTTTGTCTGGTGTGGTAAGTCCCCACCTCAACAGATGTTGTCTCAGCTCCTCTATTTTTGTTCTATGCTGCCCTATTTCTAAGTCAGATCCTACATACAAATCATCCATGTATTGATAGATAACTATGTCTGGATTTTGTTTTCTAAAAGGCTCTAAGATTTTTGTCATGCTACTTTGGAATATTGCTGGTGATCCTTTCCATCCCTGTGGAAGCACATTGTACTGATATCTAATCCCTGGTGTCTCATTGTTTATACTAGGTATGGTAAATGCAGTATACTTCCTGAAGTCTTCATCTAAGGGAACTGAAAAATATGCATCACCCACATCCAGTACTGTTACTGATTTTTTCTTTTTTAACCCTGCGGGATGTGGTATTCCTAATTGAACTTCCCAGAAGTCTTGAGTTCTCTTATTAAGTTCTCTGAAATCTACTAATTTTCTCCATTTAGTACTGTCTTTTTTCTTTATGGCAAATACTGGAGTATTGTATGGATTTTCAGGCCCAATTTTTGAAATTTTCCCTTCCTTTTCCATCTCTGTACAAATTTCTACTAATGCTTTTATTTTTTCTTCTGTCAATGGCCATTGTTTAACTTTTGGGCCATCCATTCCTGGCTTTAATTTTACTGGTACAGTCTCAATAGGGCTAATGGGAAAATTTAAAGTGCAACCAATCTGAGTCAACAGATTTCTTCCAATTATGTTGACAGGTGTAGGTCCTACTAATACTGTACCTATAGCTTTATGTCCACAGATTTCTATGAGTATCTGATCATACTGTCTTACTTTGATAAAACCTCCAATTCCCCCTATCATTTTTGGTTTCCATCTTCCTGGCAAACTCATTTCTTCTAATACTGTATCATCTGCTCCTGTATCTAATAGAGCTTCCTTTAGTTGCCCCCCTATCTTTATTGTGACGAGGGGTCGTTGCCAAAGAGTGACCTGAGGGAAGTTAAAGGATACAGTTCCTTGTCTATCGGCTCCTGCTTCTGAGGGGGAGTTGTTGTCTCTACCCCAGACCTGAAGCTCTCTTCTGGTGGGGCTGTTGGCTCTGGTCTGCTCTGAAGAAAATTCCCTGGCCTTCCCTTGTAGGAAGGCCAGATCTTCCCTAAAAAATTAGCCTGTCTCTCAGTACAATCTTTCATTTGGTGTCCTTCCTTTCCACATTTCCAACAGCCCTTTTTCCTAGGGGCCCTGCAATTTCTGGCTGTGTGCCCTTCTTTGCCACAATTGAAACACTTAACAATCTTTCTTTGGTTCCTAAAATTGCCTCTCTGCATCATTATGGTAGCTGAATTTGTTACTTGGCTCATTGCTTCAGCCAAAACTCTTGCCTTATGGCCGGGTCCTCCTACTCCCTGACATGCTGTCATCATTTCTTCTAGTGTAGCCGCTGGTCCCAATGCTTTTAAAATAGTCTTACAATCTGGGTTCGCATTTTGGACCAACAAGGTTTCTGTCATCCAATTTTTTACCTCCTGTGAAGCTTGCTCGGCTCTTAGAGTTTTATAGAACCGGTCTACATAGTCTCTAAAGGGTTCCTTTGGTCCTTGTCTTATGTCCAGAATGCTGGTAGGGCTATACATTCTTACTATTTTATTTAATCCCAGGATTATCCATCTTTTATAAATTTCTCCTACTGGGATAGGTGGATTATTTGTCATCCATCCTATTTGTTCCTGAAGGGTACTAGTAGTTCCTGCTATGTCACTTCCCCTTGGTTCTCTCATCTGGCCTGGTGCAATAGGCCCTGCATGCACTGGATGCACTCTATCCCATTCTGCAGCTTCCTCATTGATGGTCTCTTTTAACATTTGCATGGCTGCTTGATGTCCCCCCACTGTGTTTAGCATGGTGTTTAAATCTTGTGGGGTGGCTCCTTCTGATAATGCTGAAAACATGGGTATCACTTCTGGGCTGAAAGCCTTCTCTTCTACTACTTTTACCCATGCATTTAAAGTTCTAGGTGATATGGCCTGATGTACCATTTGCCCCTGGATGTTCTGCACTATAGGGTAATTTTGGCTGACCTGATTGCTGTGTCCTGTGTCAGCTGCTGCTTGCTGTGCTTTTTTCTTACTTTTGTTTTGCTCTTCCTCTATCTTGTCTAAAGCTTCCTTGGTGTCTTTTATCTCTATCCTTTGATGCACACAATAGAGGGTTGCTACTGTATTATATAATGATCTAAGTTCTTCTGATCCTGTCTGAAGGGATGGTTGTAGCTGTCCCAGTATTTGTCTACAGCCTTCTGATGTTTCTAACAGGCCAGGATTAACTGCGAATCGTTCTAGCTCCCTGCTTGCCCATACTATATGTTTTAATTTATATTTTTTCTTTCCCCCTGGCCTTAACCGAATTTTTTCCCATCGATCTAATTCTCCCCCGCTTAATACTGACGCTCTCGCACCCATCTCTCTCCTTCTAGCCTCCGCTAGTCAAAATTTTTGGCGTACTCACCAGTCGCCGCCCCTCGCCTCTTGCCGTGCGCGCTTCAGCAAGCCGAGTCCTGCGTCGAGAGAGCTCCTCTGGTTTCCCTTTCGCTTTCAGGTCCCTGTTCGGGCGCCACTGCTAGAGATTTTCCACACTGACTAAAAGGGTCTGAGGGATCTCTAGTTACCAGAGTCACACAACAGACGGGCACACACTACTTGAAGCACTCAAGGCAAGCTTTATTGAGGCTTAAGCAGTGGGTTCCCTAGTTAGCCAGAGAGCTCCCAGGCTCAGATCTGGTCTAACCAGAGAGACCCAGTACAGGCAAAAAGCAGCTGCTTATATGCAGGATCTGAGGGCTCGCCACTCCCCAGTCCCGCCCAGGCCACGCCTCCCTGGAAAGTCCCCAGCGGAAAGTCCCTTGTAGCAAGCTCGATGTCAGCAGTTCTTGAAGTACTCCGGATGCAGCTCTCGGGCCATGTGATGAAATGCTAGGCGGCTGTCAAACCTCCACTCTAACACTTCTCTCTCCGGGTCATCCATTCCATGCAGGCTCACAGGGTGTAACAAGCTGGTGTTCTCTCCTTTGTTGGCTTCTTCTAACTTCTCTGGCTCAACTGGTACTAGCTTGTAGCACCATCCAAAGGTCAGTGGATATCTGATCCCTGGCCCTGGTGTGTAGTTCTGCTAATCAGGGAAGTAGCCTTGTGTGTGGTAGATCCACAGATCAAGGATATCTTGTCTTCGTTGGGAGTGAATTAGCCCTTCCA +>Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED +ACGATCTCTAAAAGGTGTGACTGATTTTCCCAGACTCCCTAGAGATCAATGGTCTCAGTGTGTTGTCTGCCCGTGTGTGATGAACTTCGTGAGTTCCGTTCGAAATAACTCCGAATTCGTCACCCAAGGGATCAATCGGTCTCTCGAGGGTCCGAGTCTAGACCAGATTGGTCTCTCTGGGTCATGTCCGTTTTTCGTCGACGAATATACGTCCTAGACTCCCGAGCGGTGAGGGGTCAGGGCGGGTCCGGTGCGGAGGGACCTTTCAGGGGTCGCCTTTCAGGGAACATCGTTCGAGCTACAGTCGTCAAGAACTTCATGAGGCCTACGTCGAGAGCCCGGTGCACTACTTTACGATCCGCCGACAGTTTGGAGGTGAGATTGTGAAGAGAGAGGCCCAGTAGGTAGGGTACGTCCGAGTGTCCCACATTGTTCGACCACAAGAGAGGAAATAACCGGAGAAGATAGAATAGACCGAGTTGACCATGATCGAACATCGTGGTAGGTTTCCAGTCACCTATAGACTGGGGACCGGGACCACACATCAAGACGGTTAGTCCCTTCATCGGAACACACACCATCTAGGTGTCTAGTTCCTATAGAACAGAAGAAACCCTCACTTAATCGGGAAGGTCAGGGGGGAAAAGAAAATTTTTCACCGATTCTAGATGTCGACGGAACATTCAGTAACCAGAATTTCCATGGACTCCACACTGACCTTTTGGGTGGAGGAGGAGGAGAACACGAAGATCGGTCCGTGTTCGTCGTAACCATCGACGACATAACGATGAACACTAACGAGGTACAAAAAGGTCCAGAGCTCTACGACGAGGGTGGGATAGACGACGACCGAGTCGAGCAGAGTAAGAAAGGGAATGTCATCCGGTAGGTTAGTGTGATGAAAAACTGGTGAACGGTGGGTAGAATATCGTTTTAGGAAAGGTTCGGGACAGAATAAGAAGATCCATACACCGCTTATCGAGATGTTCGAGGAACATGATGAAGATATTGGGATAGACAGGGGAGTCGATGACGATACCGACACCGTAACTCGTTCGATTGTCGTGATAAGAAATCAAGGACTGAGGTTATGACATCCTCTAAGGTGGTTATAAACTCCCGAAGGGTGGGGGACGCAGGGTCTTCAAGGTGTTAGGAGCAATGTTAGTTCTCATTCAGAGAGTTCGCCACCATCGACTTCTCCGTGTCCGAGGCGTCTAGCAGGGTCTATTCACGGTTCCTAGGCAAGTGATTAGCTTACCTAGACAGAGACAGAGAGAGAGGTGGAAGAAGAAGCCAAGGAAGCCCGGACAGCCCAGGGGAGCCCCAACCCTCCACCCAGACTTTGCTATTACCACTTATAGGGACGGATTGAGATAAGTGATATCTTTCATGTCGTTTTTGATAAGAATTTGGATGGTTCGGAGGATGATAGTAATACTTATTAAAATATATGGTGTCGGTTAAACAATACAATTTGGTTAAGGTGTTTGAACGGGTAAATAGATTAAGGTTATTAAGAACAAGTAAGAAAAGAACGACCAAAACGCTAAGAAGTTAATTCCTCACATAATTCGAACACATTAACAATTAAAGAGACAGGGTGAGGTAGGTCCAGCACACTAAGGTTTAGACAAGGTCTCTAAATAATGAGGTTGATCGTAAGGTTCCGTGTCGTCACCACGTTTACTCAAAAGGTCTCGTTGGGGTTTAGGGGTCCTCGACAACTAGGAAATCCATAGAAAGGTGTCGGTCCTAAGAACGGACCTCGACGAACTACGGGGTCTGACACTCAACGTTGTCTACGACAACGCGGAGTTATCGGGAGTCGTTTAACAAGACGACGACGTGATATGGTCTGTTATTAACAGACCGGACATGGCAGTCGCAGTAACTCCGACGCGGGTATCACGAAGGACGACGAGGGTTCTTGGGTTCCTTGTTTCGAGGATAAGGGTGACGAGAAAAAAGAGAGACGTGGTGAGAAGAGAAACGGAACCACCCACGATGAGGATTACCAAGTTAAAAATGATGAAATATAAATATATTAAGTGAAGAGGTTAACAGGGAGTATAGAGGAGGAGGTCCAGACTTCTAGAGCCTGAGTAACAACGATAATGGTGGTAGAGAACAATTATCGTCGGGACATTATAAACTACTTGTAGATTAAACAGGTGACTACCCTCCCCGTATGTAACGAAAAGGATGAAAGACGGTGTACAAATATTAAACAAAATAAGACGTACCCTCCCACTAACACAGTGAAGGAAGTCACAATAAACTGGGAAGTCATGAGGTTCATGATAATTTGGTTCATGATAATTTGTCAACACAACTTAATGTCATCTTTTTAAGGGGAGGTGTTAATTTTGACACGCAATGTTAAAGACCCAGGGGAGGACTCCTAACGAATTTCTAATAACAAAATAATAAAGGTTTAACAAGAGAATTAAACGATCGATAGACAAAATTTCACAATAAGGTAAAACGAGATGATTACAATGTTACACGAACAGAGTATAAAGGATAAAAAGGATAACATTGTTTACGAGAGGGACCAGGAGAGACCTATGCCTAAGAAAAAGAACATAACAACAACCCAGAACATGTTAATTAAAGATGTCTACACAAGTCGACATGATAATACCAAAATCGTAACAGGCACTTTAACTGTCTAGATTAATGATGGAGAAGAAGACGATCTGACGGTAAATTGTCGTCAACTCAACTATGATGACCGGATTAAGGTACACATGTAACATGACACGACTGTAAACATGTACCAGGACAAGGTAACTTGCAGAATAATAATGTAAAATCTTAGCGTTTTGGTCGGCCCCGTGTTATTACATACCCTTAACCGAGTTTCCTATGGAAACCTGTCCGGACACATTACTGACTCCACAATGTTGAACAGTTGAATATCGACCATCATAGTAATAGATAACCATAATATAGTTCAAATATTTTTTTACGTATAAGAAAGACGTGGAATGGAGAATACGAACACGACTATAACTTTCTCGTCAAAAAATAGAGAGGAAAGAGGTAATAGTAAGAGGGCGATGATGATAACCATAATCATAGTAAGAAGTTTAGTCACGTGAAATTTGATTGTGTCTCACCCCAATTAAAATGTGTACCGAAATCCGAAACTAGGGTATTTGACTAATATAGGAGTACGTAGACAAGATGGTACAGTAAAAAGGTGTACAATTTTAAAAGACAGTGTAAATGGTTATGATGAAGAACACCCAACCCCAGACACCCATGTGTCCGTACACACCGGGTTTGTAATACATGGAGACATAGTATACGAAATCGTAGACTACGTGTTTTATCTCACCACCAACGAAGGAAGGTGTGTCCATGGGGTATTATCTGACACTGGGTGTTAAAAAGACATCGTGATGTCTAGTAGTTGTAGGGTTCCTCGTACCACGGGGTAGAGGTGGGGGTAGAGGTGTTCACGACTATAAAGAGGAAGTGAGAGTAACGGTGACAGAAGACGAGAAAGATAATCAGATAGTTAATTGGACAGATAAAAAGAAACAGAATTATAAAAGGATATAAGATACTAATGATACCTGGTGTGTTGATAACGATAATAATAACGATGATGATTACGATGATAACGATGATAACCATATCCAACGTAATGTACATGATGAATGACGAAACTATCTCTTCGAACTACTCAGACTGACAAGACTACTCGAGAAGCAGCGACAGAGGCGAAGAAGGACGGTATCCTCTACGGATTCCGAAAACAATACTTTGTTTGAACCGTTACTTTCGTTGTGAAAAATGTTATCGTTAACCATGTTCGTCAAAATCCGACTGAAGGACCTACGAAGGTCCCGAGATCAGATCCTAGATGACCGAGGTAAAGAACGAGAGGAGACAGCTCATTGCGGATAAGACGATACAGCTGTGGGTTAAGACTTTACCTATTTGTCGTCAACAACGTCTTAAGAATAATACCGAAGGTGAGGACGGGTTCATAGGGGTATTCAAAGTATCTATACAACGGGATTCGGTACCTCGGTTTAGGATCCTTTTACAGATTGTCGAAGTAAGAATTCGAGGAGATTTTCGAGATCACAGGTAAGTAACACACCGAGGGAGACACCGGGAACCAGAAGACCCCGAACAAGGTAGATAGGAGACAGTCAAAGCATTGTGATCCGTTTCCACCGAAATAGAAAAAACCACAATAATTACGACGATCACGGTTCATAACATCTCTAGGATGGAACAATACAGGACGAACTATAAGTGTGGATCCCGATTGATACACAGGATTATTCCGGAAAGAATATCGTCTCAGACTTTTTGTCAGTTTCATTATGTCTACTTAATCAACCAGACGATCAAGTCCCAGATGAACACACGATATAGAGAAAAAGGAGGTAAGATACCTCTGAGGGACTGGGTTTACGGTCAGAGAAAGAGGACATACGTCTGGGGTTATACAACAATAATGGTTAGATCGTAGGGGATCACCCTACACATGAAGACTTGAATAAGAACCTACTCCCGAAAGTATCACTACAGATATTTTGGTAGGGGATCGAAAGGGACTTTGTATGTATACCACAAAATGATTTGAAAAGGTACAAGATTAGGAGTAGGACAGATGAACGGTGTGTTAGTAGTGGACGGTAGACAAAAGGTATTAGGGATTACTAGAAACGAAAAGAAGAACCGTGATGAAAATACAGTGATAATAGAACATAATGATGACGGGGAAGTGGAAAGGTCTCCTCGAAACGACCAGGAAAGGTTTCACCTAAAGACGACAGGGACATTATTTGGGCTTTTAAAACTTAAAAACATTAAACAAAAACATTAAGAAATCAAACATACAGACAACGATAATACAGATGATAAGAAAGGGGACGTGACATGGGGGGTTAGGGGGGAAAAGAAAATTTTAACACCTACTTATGACGGTAAACATGACGACAGAATTCTACAAGTCGGACTAGAGAATGGACAGGATATTAAAAGAAATTAAGAAATAAGTATCTAAGATGATGAGGAACTGAAACCCCTAACATCCCTTAAGGTTTAAGGACGAACTAAGGGCGGGTGGTTGTCCGCCGGGATTGGCATCGTGGCCACTTTAACGACGGTAACAGTCATACATAACAAAAATGACCGGTAGAAGGACGATTAAAATTTTCTTTTATACGACAAAGGACGGGACAAAGACGACCTTATTGAAGACGAAGATATATAGGTGACCGATGTACTTGACGATGGTCCTATTGAAAAGGAAGATTTACACATGTTAGATCAACGGTATAAGGACCTGATGTCAGATGAACAGGTACGTACCGAAGAGGAAAATCGACTGTAAATAGTGTCGACCGATGATAAAGAAAACGATGATGTCCACCGTCCAATTTTAGTGATCGGTAACGAGAGGTTAATGACACTATAAAGAGTACAAGTAGAACCCGGAATAGATAAGGTAGATTTTTATCATGAAAGGACTAAGGTCGTGACTGATTAAATAGATGAACAAGTAAAGGAGGTTAAGGAAACACACGACCATGGGTACGGTCTATCTGGAAAAGGAAAAAATAATTGACGAGATAATAAACTAACTGATTGAGACTAAGTGAAACTAGACCAACACGAACTTACTAAGGATTACGTATAACACTCAGACAATGATACAAATGAAGATTAGGGCTTAGGACGTTTCGATCTATTTAACGAACATTGAGTCAGAAGACTAAACAACACAGTCAATCCCACTGTTGAAAAACAGAAGGAGATAATCATTGTATAGGACGAAAAGGATTAAATCAGAGGGACAATCGACGGGGTAGATGTATCTTCCAAAGACGAGGATGATACCCAAGAAAGAGATTGACCATGGTATTAAAGTGATTCCCTCCCCATAATTGTTTGAGGGTGAGTCCTTAGGTCCACCGAACGGTTATGAGACAGGTGGTACAAAGGGTACAAAGGAAAACATACCCGTCAAATTTAAATCCTCAGAAAGGGGTATAATGATACGAAAGACACCAATAAAAAACGTGACGGAGACAATTAACAAAATGTAGTAATCACACCCGTGGGGAGTAAGAACGTATAAAAGGACAAAAGTCTAAAAATTTACCGAGAACTATTTAAACTATACAGGTAACCGGAACGGGGACGAAGACATAAAGACGATAATTCAGAAAACTACCCAGTATTATGTGAGGTACATGACCAAGAAAATCTTAGAGAGACAAAAGACGGTCAAGATCGAGACGAAGAAGACAATCACCATAATGAAGACAATCACGAAACCAAGGAGATTCCTCAAATGTATTAACGGAATGAAATTAGGGACCCATTTAGACTGAACGGGTTAAGTTAAAGGGGTGATTGAAGACATACAGTAACTGTCAGGTCGACAGAAAAAGACCGTCGTGATATCCGACATGACAGGTAAATAGTCCTACCTCAAGTATTGGGTAGGTTTCCTTACCTCCAAGAAAGACTACAAAAAACAGACCACACCATTCAGGGGTGGAGTTGTCTACAACAGAGTCGAGGAGATAAAAACAAGATACGACGGGATAAAGATTCAGTCTAGGATGTATGTTTAGTAGGTACATAACTATCTATTGATACAGACCTAAAACAAAAGATTTTCCGAGATTCTAAAAACAGTACGATGAAACCTTATAACGACCACTAGGAAAGGTAGGGACACCTTCGTGTAACATGACTATAGATTAGGGACCACAGAGTAACAAATATGATCCATACCATTTACGTCATATGAAGGACTTCAGAAGTAGATTCCCTTGACTTTTTATACGTAGTGGGTGTAGGTCATGACAATGACTAAAAAAGAAAAAATTGGGACGCCCTACACCATAAGGATTAACTTGAAGGGTCTTCAGAACTCAAGAGAATAATTCAAGAGACTTTAGATGATTAAAAGAGGTAAATCATGACAGAAAAAAGAAATACCGTTTATGACCTCATAACATACCTAAAAGTCCGGGTTAAAAACTTTAAAAGGGAAGGAAAAGGTAGAGACATGTTTAAAGATGATTACGAAAATAAAAAAGAAGACAGTTACCGGTAACAAATTGAAAACCCGGTAGGTAAGGACCGAAATTAAAATGACCATGTCAGAGTTATCCCGATTACCCTTTTAAATTTCACGTTGGTTAGACTCAGTTGTCTAAAGAAGGTTAATACAACTGTCCACATCCAGGATGATTATGACATGGATATCGAAATACAGGTGTCTAAAGATACTCATAGACTAGTATGACAGAATGAAACTATTTTGGAGGTTAAGGGGGATAGTAAAAACCAAAGGTAGAAGGACCGTTTGAGTAAAGAAGATTATGACATAGTAGACGAGGACATAGATTATCTCGAAGGAAATCAACGGGGGGATAGAAATAACACTGCTCCCCAGCAACGGTTTCTCACTGGACTCCCTTCAATTTCCTATGTCAAGGAACAGATAGCCGAGGACGAAGACTCCCCCTCAACAACAGAGATGGGGTCTGGACTTCGAGAGAAGACCACCCCGACAACCGAGACCAGACGAGACTTCTTTTAAGGGACCGGAAGGGAACATCCTTCCGGTCTAGAAGGGATTTTTTAATCGGACAGAGAGTCATGTTAGAAAGTAAACCACAGGAAGGAAAGGTGTAAAGGTTGTCGGGAAAAAGGATCCCCGGGACGTTAAAGACCGACACACGGGAAGAAACGGTGTTAACTTTGTGAATTGTTAGAAAGAAACCAAGGATTTTAACGGAGAGACGTAGTAATACCATCGACTTAAACAATGAACCGAGTAACGAAGTCGGTTTTGAGAACGGAATACCGGCCCAGGAGGATGAGGGACTGTACGACAGTAGTAAAGAAGATCACATCGGCGACCAGGGTTACGAAAATTTTATCAGAATGTTAGACCCAAGCGTAAAACCTGGTTGTTCCAAAGACAGTAGGTTAAAAAATGGAGGACACTTCGAACGAGCCGAGAATCTCAAAATATCTTGGCCAGATGTATCAGAGATTTCCCAAGGAAACCAGGAACAGAATACAGGTCTTACGACCATCCCGATATGTAAGAATGATAAAATAAATTAGGGTCCTAATAGGTAGAAAATATTTAAAGAGGATGACCCTATCCACCTAATAAACAGTAGGTAGGATAAACAAGGACTTCCCATGATCATCAAGGACGATACAGTGAAGGGGAACCAAGAGAGTAGACCGGACCACGTTATCCGGGACGTACGTGACCTACGTGAGATAGGGTAAGACGTCGAAGGAGTAACTACCAGAGAAAATTGTAAACGTACCGACGAACTACAGGGGGGTGACACAAATCGTACCACAAATTTAGAACACCCCACCGAGGAAGACTATTACGACTTTTGTACCCATAGTGAAGACCCGACTTTCGGAAGAGAAGATGATGAAAATGGGTACGTAAATTTCAAGATCCACTATACCGGACTACATGGTAAACGGGGACCTACAAGACGTGATATCCCATTAAAACCGACTGGACTAACGACACAGGACACAGTCGACGACGAACGACACGAAAAAAGAATGAAAACAAAACGAGAAGGAGATAGAACAGATTTCGAAGGAACCACAGAAAATAGAGATAGGAAACTACGTGTGTTATCTCCCAACGATGACATAATATATTACTAGATTCAAGAAGACTAGGACAGACTTCCCTACCAACATCGACAGGGTCATAAACAGATGTCGGAAGACTACAAAGATTGTCCGGTCCTAATTGACGCTTAGCAAGATCGAGGGACGAACGGGTATGATATACAAAATTAAATATAAAAAAGAAAGGGGGACCGGAATTGGCTTAAAAAAGGGTAGCTAGATTAAGAGGGGGCGAATTATGACTGCGAGAGCGTGGGTAGAGAGAGGAAGATCGGAGGCGATCAGTTTTAAAAACCGCATGAGTGGTCAGCGGCGGGGAGCGGAGAACGGCACGCGCGAAGTCGTTCGGCTCAGGACGCAGCTCTCTCGAGGAGACCAAAGGGAAAGCGAAAGTCCAGGGACAAGCCCGCGGTGACGATCTCTAAAAGGTGTGACTGATTTTCCCAGACTCCCTAGAGATCAATGGTCTCAGTGTGTTGTCTGCCCGTGTGTGATGAACTTCGTGAGTTCCGTTCGAAATAACTCCGAATTCGTCACCCAAGGGATCAATCGGTCTCTCGAGGGTCCGAGTCTAGACCAGATTGGTCTCTCTGGGTCATGTCCGTTTTTCGTCGACGAATATACGTCCTAGACTCCCGAGCGGTGAGGGGTCAGGGCGGGTCCGGTGCGGAGGGACCTTTCAGGGGTCGCCTTTCAGGGAACATCGTTCGAGCTACAGTCGTCAAGAACTTCATGAGGCCTACGTCGAGAGCCCGGTACACTACTTTACGATCCGCCGACAGTTTGGAGGTGAGATTGTGAAGAGAGAGGCCCAGTAGGTAAGGTACGTCCGAGTGTCCCACATTGTTCGACCACAAGAGAGGAAACAACCGAAGAAGATTGAAGAGACCGAGTTGACCATGATCGAACATCGTGGTAGGTTTCCAGTCACCTATAGACTAGGGACCGGGACCACACATCAAGACGATTAGTCCCTTCATCGGAACACACACCATCTAGGTGTCTAGTTCCTATAGAACAGAAGCAACCCTCACTTAATCGGGAAGGT >Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.WITH-XS TGCTAGAGATTTTCCACACTGACTAAAAGGGTCTGAGGGATCTCTXXXXXXXAGAGTCACACAACAGACGGGCACACACTACTTGAAGCACTCAAGGCAAGCTTTATTGAGGCTTAAGCAGTGGGTTCCCTAGTTAGCCAGAGAGCTCCCAGGCTCAGATCTGGTCTAACCAGAGAGACCCAGTACAGGCAAAAAGCAGCTGCTTATATGCAGGATCTGAGGGCTCGCCACTCCCCAGTCCCGCCCAGGCCACGCCTCCCTGGAAAGTCCCCAGCGGAAAGTCCCTTGTAGCAAGCTCGATGTCAGCAGTTCTTGAAGTACTCCGGATGCAGCTCTCGGGCCACGTGATGAAATGCTAGGCGGCTGTCAAACCTCCACTCTAACACTTCTCTCTCCGGGTCATCCATCCCATGCAGGCTCACAGGGTGTAACAAGCTGGTGTTCTCTCCTTTATTGGCCTCTTCTATCTTATCTGGCTCAACTGGTACTAGCTTGTAGCACCATCCAAAGGTCAGTGGATATCTGACCCCTGGCCCTGGTGTGTAGTTCTGCCAATCAGGGAAGTAGCCTTGTGTGTGGTAGATCCACAGATCAAGGATATCTTGTCTTCTTTGGGAGTGAATTAGCCCTTCCAGTCCCCCCTTTTCTTTTAAAAAGTGGCTAAGATCTACAGCTGCCTTGTAAGTCATTGGTCTTAAAGGTACCTGAGGTGTGACTGGAAAACCCACCTCCTCCTCCTCTTGTGCTTCTAGCCAGGCACAAGCAGCATTGGTAGCTGCTGTATTGCTACTTGTGATTGCTCCATGTTTTTCCAGGTCTCGAGATGCTGCTCCCACCCTATCTGCTGCTGGCTCAGCTCGTCTCATTCTTTCCCTTACAGTAGGCCATCCAATCACACTACTTTTTGACCACTTGCCACCCATCTTATAGCAAAATCCTTTCCAAGCCCTGTCTTATTCTTCTAGGTATGTGGCGAATAGCTCTACAAGCTCCTTGTACTACTTCTATAACCCTATCTGTCCCCTCAGCTACTGCTATGGCTGTGGCATTGAGCAAGCTAACAGCACTATTCTTTAGTTCCTGACTCCAATACTGTAGGAGATTCCACCAATATTTGAGGGCTTCCCACCCCCTGCGTCCCAGAAGTTCCACAATCCTCGTTACAATCAAGAGTAAGTCTCTCAAGCGGTGGTAGCTGAAGAGGCACAGGCTCCGCAGATCGTCCCAGATAAGTGCCAAGGATCCGTTCACTAATCGAATGGATCTGTCTCTGTCTCTCTCTCCACCTTCTTCTTCGGTTCCTTCGGGCCTGTCGGGTCCCCTCGGGGTTGGGAGGTGGGTCTGAAACGATAATGGTGAATATCCCTGCCTAACTCTATTCACTATAGAAAGTACAGCAAAAACTATTCTTAAACCTACCAAGCCTCCTACTATCATTATGAATAATTTTATATACCACAGCCAATTTGTTATGTTAAACCAATTCCACAAACTTGCCCATTTATCTAATTCCAATAATTCTTGTTCATTCTTTTCTTGCTGGTTTTGCGATTCTTCAATTAAGGAGTGTATTAAGCTTGTGTAATTGTTAATTTCTCTGTCCCACTCCATCCAGGTCGTGTGATTCCAAATCTGTTCCAGAGATTTATTACTCCAACTAGCATTCCAAGGCACAGCAGTGGTGCAAATGAGTTTTCCAGAGCAACCCCAAATCCCCAGGAGCTGTTGATCCTTTAGGTATCTTTCCACAGCCAGGATTCTTGCCTGGAGCTGCTTGATGCCCCAGACTGTGAGTTGCAACAGATGCTGTTGCGCCTCAATAGCCCTCAGCAAATTGTTCTGCTGCTGCACTATACCAGACAATAATTGTCTGGCCTGTACCGTCAGCGTCATTGAGGCTGCGCCCATAGTGCTTCCTGCTGCTCCCAAGAACCCAAGGAACAAAGCTCCTATTCCCACTGCTCTTTTTTCTCTCTGCACCACTCTTCTCTTTGCCTTGGTGGGTGCTACTCCTAATGGTTCAATTTTTACTACTTTATATTTATATAATTCACTTCTCCAATTGTCCCTCATATCTCCTCCTCCAGGTCTGAAGATCTCGGACTCATTGTTGCTATTACCACCATCTCTTGTTAATAGCAGCCCTGTAATATTTGATGAACATCTAATTTGTCCACTGATGGGAGGGGCATACATTGCTTTTCCTACTTTCTGCCACATGTTTATAATTTGTTTTATTCTGCATGGGAGGGTGATTGTGTCACTTCCTTCAGTGTTATTTGACCCTTCAGTACTCCAAGTACTATTAAACCAAGTACTATTAAACAGTTGTGTTGAATTACAGTAGAAAAATTCCCCTCCACAATTAAAACTGTGCGTTACAATTTCTGGGTCCCCTCCTGAGGATTGCTTAAAGATTATTGTTTTATTATTTCCAAATTGTTCTCTTAATTTGCTAGCTATCTGTTTTAAAGTGTTATTCCATTTTGCTCTACTAATGTTACAATGTGCTTGTCTCATATTTCCTATTTTTCCTATTGTAACAAATGCTCTCCCTGGTCCTCTCTGGATACGGATTCTTTTTCTTGTATTGTTGTTGGGTCTTGTACAATTAATTTCTACAGATGTGTTCAGCTGTACTATTATGGTTTTAGCATTGTCCGTGAAATTGACAGATCTAATTACTACCTCTTCTTCTGCTAGACTGCCATTTAACAGCAGTTGAGTTGATACTACTGGCCTAATTCCATGTGTACATTGTACTGTGCTGACATTTGTACATGGTCCTGTTCCATTGAACGTCTTATTATTACATTTTAGAATCGCAAAACCAGCCGGGGCACAATAATGTATGGGAATTGGCTCAAAGGATACCTTTGGACAGGCCTGTGTAATGACTGAGGTGTTACAACTTGTCAACTTATAGCTGGTAGTATCATTATCTATTGGTATTATATCAAGTTTATAAAAAAATGCATATTCTTTCTGCACCTTACCTCTTATGCTTGTGCTGATATTGAAAGAGCAGTTTTTTATCTCTCCTTTCTCCATTATCATTCTCCCGCTACTACTATTGGTATTAGTATCATTCTTCAAATCAGTGCACTTTAAACTAACACAGAGTGGGGTTAATTTTACACATGGCTTTAGGCTTTGATCCCATAAACTGATTATATCCTCATGCATCTGTTCTACCATGTCATTTTTCCACATGTTAAAATTTTCTGTCACATTTACCAATACTACTTCTTGTGGGTTGGGGTCTGTGGGTACACAGGCATGTGTGGCCCAAACATTATGTACCTCTGTATCATATGCTTTAGCATCTGATGCACAAAATAGAGTGGTGGTTGCTTCCTTCCACACAGGTACCCCATAATAGACTGTGACCCACAATTTTTCTGTAGCACTACAGATCATCAACATCCCAAGGAGCATGGTGCCCCATCTCCACCCCCATCTCCACAAGTGCTGATATTTCTCCTTCACTCTCATTGCCACTGTCTTCTGCTCTTTCTATTAGTCTATCAATTAACCTGTCTATTTTTCTTTGTCTTAATATTTTCCTATATTCTATGATTACTATGGACCACACAACTATTGCTATTATTATTGCTACTACTAATGCTACTATTGCTACTATTGGTATAGGTTGCATTACATGTACTACTTACTGCTTTGATAGAGAAGCTTGATGAGTCTGACTGTTCTGATGAGCTCTTCGTCGCTGTCTCCGCTTCTTCCTGCCATAGGAGATGCCTAAGGCTTTTGTTATGAAACAAACTTGGCAATGAAAGCAACACTTTTTACAATAGCAATTGGTACAAGCAGTTTTAGGCTGACTTCCTGGATGCTTCCAGGGCTCTAGTCTAGGATCTACTGGCTCCATTTCTTGCTCTCCTCTGTCGAGTAACGCCTATTCTGCTATGTCGACACCCAATTCTGAAATGGATAAACAGCAGTTGTTGCAGAATTCTTATTATGGCTTCCACTCCTGCCCAAGTATCCCCATAAGTTTCATAGATATGTTGCCCTAAGCCATGGAGCCAAATCCTAGGAAAATGTCTAACAGCTTCATTCTTAAGCTCCTCTAAAAGCTCTAGTGTCCATTCATTGTGTGGCTCCCTCTGTGGCCCTTGGTCTTCTGGGGCTTGTTCCATCTATCCTCTGTCAGTTTCGTAACACTAGGCAAAGGTGGCTTTATCTTTTTTGGTGTTATTAATGCTGCTAGTGCCAAGTATTGTAGAGATCCTACCTTGTTATGTCCTGCTTGATATTCACACCTAGGGCTAACTATGTGTCCTAATAAGGCCTTTCTTATAGCAGAGTCTGAAAAACAGTCAAAGTAATACAGATGAATTAGTTGGTCTGCTAGTTCAGGGTCTACTTGTGTGCTATATCTCTTTTTCCTCCATTCTATGGAGACTCCCTGACCCAAATGCCAGTCTCTTTCTCCTGTATGCAGACCCCAATATGTTGTTATTACCAATCTAGCATCCCCTAGTGGGATGTGTACTTCTGAACTTATTCTTGGATGAGGGCTTTCATAGTGATGTCTATAAAACCATCCCCTAGCTTTCCCTGAAACATACATATGGTGTTTTACTAAACTTTTCCATGTTCTAATCCTCATCCTGTCTACTTGCCACACAATCATCACCTGCCATCTGTTTTCCATAATCCCTAATGATCTTTGCTTTTCTTCTTGGCACTACTTTTATGTCACTATTATCTTGTATTACTACTGCCCCTTCACCTTTCCAGAGGAGCTTTGCTGGTCCTTTCCAAAGTGGATTTCTGCTGTCCCTGTAATAAACCCGAAAATTTTGAATTTTTGTAATTTGTTTTTGTAATTCTTTAGTTTGTATGTCTGTTGCTATTATGTCTACTATTCTTTCCCCTGCACTGTACCCCCCAATCCCCCCTTTTCTTTTAAAATTGTGGATGAATACTGCCATTTGTACTGCTGTCTTAAGATGTTCAGCCTGATCTCTTACCTGTCCTATAATTTTCTTTAATTCTTTATTCATAGATTCTACTACTCCTTGACTTTGGGGATTGTAGGGAATTCCAAATTCCTGCTTGATTCCCGCCCACCAACAGGCGGCCCTAACCGTAGCACCGGTGAAATTGCTGCCATTGTCAGTATGTATTGTTTTTACTGGCCATCTTCCTGCTAATTTTAAAAGAAAATATGCTGTTTCCTGCCCTGTTTCTGCTGGAATAACTTCTGCTTCTATATATCCACTGGCTACATGAACTGCTACCAGGATAACTTTTCCTTCTAAATGTGTACAATCTAGTTGCCATATTCCTGGACTACAGTCTACTTGTCCATGCATGGCTTCTCCTTTTAGCTGACATTTATCACAGCTGGCTACTATTTCTTTTGCTACTACAGGTGGCAGGTTAAAATCACTAGCCATTGCTCTCCAATTACTGTGATATTTCTCATGTTCATCTTGGGCCTTATCTATTCCATCTAAAAATAGTACTTTCCTGATTCCAGCACTGACTAATTTATCTACTTGTTCATTTCCTCCAATTCCTTTGTGTGCTGGTACCCATGCCAGATAGACCTTTTCCTTTTTTATTAACTGCTCTATTATTTGATTGACTAACTCTGATTCACTTTGATCTGGTTGTGCTTGAATGATTCCTAATGCATATTGTGAGTCTGTTACTATGTTTACTTCTAATCCCGAATCCTGCAAAGCTAGATAAATTGCTTGTAACTCAGTCTTCTGATTTGTTGTGTCAGTTAGGGTGACAACTTTTTGTCTTCCTCTATTAGTAACATATCCTGCTTTTCCTAATTTAGTCTCCCTGTTAGCTGCCCCATCTACATAGAAGGTTTCTGCTCCTACTATGGGTTCTTTCTCTAACTGGTACCATAATTTCACTAAGGGAGGGGTATTAACAAACTCCCACTCAGGAATCCAGGTGGCTTGCCAATACTCTGTCCACCATGTTTCCCATGTTTCCTTTTGTATGGGCAGTTTAAATTTAGGAGTCTTTCCCCATATTACTATGCTTTCTGTGGTTATTTTTTGCACTGCCTCTGTTAATTGTTTTACATCATTAGTGTGGGCACCCCTCATTCTTGCATATTTTCCTGTTTTCAGATTTTTAAATGGCTCTTGATAAATTTGATATGTCCATTGGCCTTGCCCCTGCTTCTGTATTTCTGCTATTAAGTCTTTTGATGGGTCATAATACACTCCATGTACTGGTTCTTTTAGAATCTCTCTGTTTTCTGCCAGTTCTAGCTCTGCTTCTTCTGTTAGTGGTATTACTTCTGTTAGTGCTTTGGTTCCTCTAAGGAGTTTACATAATTGCCTTACTTTAATCCCTGGGTAAATCTGACTTGCCCAATTCAATTTCCCCACTAACTTCTGTATGTCATTGACAGTCCAGCTGTCTTTTTCTGGCAGCACTATAGGCTGTACTGTCCATTTATCAGGATGGAGTTCATAACCCATCCAAAGGAATGGAGGTTCTTTCTGATGTTTTTTGTCTGGTGTGGTAAGTCCCCACCTCAACAGATGTTGTCTCAGCTCCTCTATTTTTGTTCTATGCTGCCCTATTTCTAAGTCAGATCCTACATACAAATCATCCATGTATTGATAGATAACTATGTCTGGATTTTGTTTTCTAAAAGGCTCTAAGATTTTTGTCATGCTACTTTGGAATATTGCTGGTGATCCTTTCCATCCCTGTGGAAGCACATTGTACTGATATCTAATCCCTGGTGTCTCATTGTTTATACTAGGTATGGTAAATGCAGTATACTTCCTGAAGTCTTCATCTAAGGGAACTGAAAAATATGCATCACCCACATCCAGTACTGTTACTGATTTTTTCTTTTTTAACCCTGCGGGATGTGGTATTCCTAATTGAACTTCCCAGAAGTCTTGAGTTCTCTTATTAAGTTCTCTGAAATCTACTAATTTTCTCCATTTAGTACTGTCTTTTTTCTTTATGGCAAATACTGGAGTATTGTATGGATTTTCAGGCCCAATTTTTGAAATTTTCCCTTCCTTTTCCATCTCTGTACAAATTTCTACTAATGCTTTTATTTTTTCTTCTGTCAATGGCCATTGTTTAACTTTTGGGCCATCCATTCCTGGCTTTAATTTTACTGGTACAGTCTCAATAGGGCTAATGGGAAAATTTAAAGTGCAACCAATCTGAGTCAACAGATTTCTTCCAATTATGTTGACAGGTGTAGGTCCTACTAATACTGTACCTATAGCTTTATGTCCACAGATTTCTATGAGTATCTGATCATACTGTCTTACTTTGATAAAACCTCCAATTCCCCCTATCATTTTTGGTTTCCATCTTCCTGGCAAACTCATTTCTTCTAATACTGTATCATCTGCTCCTGTATCTAATAGAGCTTCCTTTAGTTGCCCCCCTATCTTTATTGTGACGAGGGGTCGTTGCCAAAGAGTGACCTGAGGGAAGTTAAAGGATACAGTTCCTTGTCTATCGGCTCCTGCTTCTGAGGGGGAGTTGTTGTCTCTACCCCAGACCTGAAGCTCTCTTCTGGTGGGGCTGTTGGCTCTGGTCTGCTCTGAAGAAAATTCCCTGGCCTTCCCTTGTAGGAAGGCCAGATCTTCCCTAAAAAATTAGCCTGTCTCTCAGTACAATCTTTCATTTGGTGTCCTTCCTTTCCACATTTCCAACAGCCCTTTTTCCTAGGGGCCCTGCAATTTCTGGCTGTGTGCCCTTCTTTGCCACAATTGAAACACTTAACAATCTTTCTTTGGTTCCTAAAATTGCCTCTCTGCATCATTATGGTAGCTGAATTTGTTACTTGGCTCATTGCTTCAGCCAAAACTCTTGCCTTATGGCCGGGTCCTCCTACTCCCTGACATGCTGTCATCATTTCTTCTAGTGTAGCCGCTGGTCCCAATGCTTTTAAAATAGTCTTACAATCTGGGTTCGCATTTTGGACCAACAAGGTTTCTGTCATCCAATTTTTTACCTCCTGTGAAGCTTGCTCGGCTCTTAGAGTTTTATAGAACCGGTCTACATAGTCTCTAAAGGGTTCCTTTGGTCCTTGTCTTATGTCCAGAATGCTGGTAGGGCTATACATTCTTACTATTTTATTTAATCCCAGGATTATCCATCTTTTATAAATTTCTCCTACTGGGATAGGTGGATTATTTGTCATCCATCCTATTTGTTCCTGAAGGGTACTAGTAGTTCCTGCTATGTCACTTCCCCTTGGTTCTCTCATCTGGCCTGGTGCAATAGGCCCTGCATGCACTGGATGCACTCTATCCCATTCTGCAGCTTCCTCATTGATGGTCTCTTTTAACATTTGCATGGCTGCTTGATGTCCCCCCACTGTGTTTAGCATGGTGTTTAAATCTTGTGGGGTGGCTCCTTCTGATAATGCTGAAAACATGGGTATCACTTCTGGGCTGAAAGCCTTCTCTTCTACTACTTTTACCCATGCATTTAAAGTTCTAGGTGATATGGCCTGATGTACCATTTGCCCCTGGATGTTCTGCACTATAGGGTAATTTTGGCTGACCTGATTGCTGTGTCCTGTGTCAGCTGCTGCTTGCTGTGCTTTTTTCTTACTTTTGTTTTGCTCTTCCTCTATCTTGTCTAAAGCTTCCTTGGTGTCTTTTATCTCTATCCTTTGATGCACACAATAGAGGGTTGCTACTGTATTATATAATGATCTAAGTTCTTCTGATCCTGTCTGAAGGGATGGTTGTAGCTGTCCCAGTATTTGTCTACAGCCTTCTGATGTTTCTAACAGGCCAGGATTAACTGCGAATCGTTCTAGCTCCCTGCTTGCCCATACTATATGTTTTAATTTATATTTTTTCTTTCCCCCTGGCCTTAACCGAATTTTTTCCCATCGATCTAATTCTCCCCCGCTTAATACTGACGCTCTCGCACCCATCTCTCTCCTTCTAGCCTCCGCTAGTCAAAATTTTTGGCGTACTCACCAGTCGCCGCCCCTCGCCTCTTGCCGTGCGCGCTTCAGCAAGCCGAGTCCTGCGTCGAGAGAGCTCCTCTGGTTTCCCTTTCGCTTTCAGGTCCCTGTTCGGGCGCCACTGCTAGAGATTTTCCACACTGACTAAAAGGGTCTGAGGGATCTCTAGTTACCAGAGTCACACAACAGACGGGCACACACTACTTGAAGCACTCAAGGCAAGCTTTATTGAGGCTTAAGCAGTGGGTTCCCTAGTTAGCCAGAGAGCTCCCAGGCTCAGATCTGGTCTAACCAGAGAGACCCAGTACAGGCAAAAAGCAGCTGCTTATATGCAGGATCTGAGGGCTCGCCACTCCCCAGTCCCGCCCAGGCCACGCCTCCCTGGAAAGTCCCCAGCGGAAAGTCCCTTGTAGCAAGCTCGATGTCAGCAGTTCTTGAAGTACTCCGGATGCAGCTCTCGGGCCATGTGATGAAATGCTAGGCGGCTGTCAAACCTCCACTCTAACACTTCTCTCTCCGGGTCATCCATTCCATGCAGGCTCACAGGGTGTAACAAGCTGGTGTTCTCTCCTTTGTTGGCTTCTTCTAACTTCTCTGGCTCAACTGGTACTAGCTTGTAGCACCATCCAAAGGTCAGTGGATATCTGATCCCTGGCCCTGGTGTGTAGTTCTGCTAATCAGGGAAGTAGCCTTGTGTGTGGTAGATCCACAGATCAAGGATATCTTGTCTTCGTTGGGAGTGAATTAGCCCTTCCA >small-sequence-with-xs diff --git a/tests/expected-results-edgy/errors.json b/tests/expected-results-edgy/errors.json index ff94b05..1cd3366 100644 --- a/tests/expected-results-edgy/errors.json +++ b/tests/expected-results-edgy/errors.json @@ -318,19 +318,86 @@ "message": "Query sequence contains a long deletion." } ], - "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED[REVERSE_COMPLEMENT]": [ + "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSECOMPLEMENTED[REVERSE_COMPLEMENT]": [ { - "sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED[REVERSE_COMPLEMENT]", + "sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSECOMPLEMENTED[REVERSE_COMPLEMENT]", "error": "DeletionInOrf", "message": "Smaller ORF nef at 8008-8682 can have maximum deletions 30, got 54" }, { - "sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED[REVERSE_COMPLEMENT]", + "sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSECOMPLEMENTED[REVERSE_COMPLEMENT]", "error": "MajorSpliceDonorSiteMutated", "message": "Query sequence has a mutated splice donor site, G." }, { - "sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED[REVERSE_COMPLEMENT]", + "sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSECOMPLEMENTED[REVERSE_COMPLEMENT]", + "error": "NonHIV", + "message": "Sequence contains unrecognized parts. It is probably a Human/HIV Chimera sequence." + } + ], + "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED": [ + { + "sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED", + "error": "InternalStopInOrf", + "message": "ORF gag at 1-1497 contains an internal stop codon at 58" + }, + { + "sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED", + "error": "InternalStopInOrf", + "message": "ORF pol at 1290-4301 contains an internal stop codon at 1386" + }, + { + "sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED", + "error": "InternalStopInOrf", + "message": "ORF env at 5430-8006 contains an internal stop codon at 5556" + }, + { + "sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED", + "error": "InternalStopInOrf", + "message": "Smaller ORF vif at 4246-4821 contains an internal stop codon at 4315" + }, + { + "sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED", + "error": "InternalStopInOrf", + "message": "Smaller ORF vpr at 4764-5051 contains an internal stop codon at 4821" + }, + { + "sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED", + "error": "InternalStopInOrf", + "message": "Smaller ORF tat_exon1 at 5032-5247 contains an internal stop codon at 5083" + }, + { + "sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED", + "error": "InternalStopInOrf", + "message": "Smaller ORF vpu at 5267-5512 contains an internal stop codon at 5318" + }, + { + "sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED", + "error": "FrameshiftInOrf", + "message": "Smaller ORF tat_exon2 at 7567-7662 contains out of frame indels that impact 71 positions." + }, + { + "sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED", + "error": "InsertionInOrf", + "message": "Smaller ORF rev_exon2 at 7568-7864 can have maximum insertions 90, got 108" + }, + { + "sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED", + "error": "InternalStopInOrf", + "message": "Smaller ORF nef at 8008-8682 contains an internal stop codon at 8050" + }, + { + "sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED", + "error": "RevResponseElementDeletion", + "message": "Query Sequence exceeds maximum deletion tolerance in RRE. Contains 131 deletions with max tolerance of 20 deletions." + }, + { + "sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED", + "error": "MajorSpliceDonorSiteMutated", + "message": "Query sequence has a mutated splice donor site, A." + }, + { + "sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED", "error": "NonHIV", "message": "Sequence contains unrecognized parts. It is probably a Human/HIV Chimera sequence." } diff --git a/tests/expected-results-edgy/holistic.json b/tests/expected-results-edgy/holistic.json index ad3e2c4..f365f8f 100644 --- a/tests/expected-results-edgy/holistic.json +++ b/tests/expected-results-edgy/holistic.json @@ -59,7 +59,7 @@ "orfs_end": 8006, "blast_n_conseqs": 0 }, - "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED[REVERSE_COMPLEMENT]": { + "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSECOMPLEMENTED[REVERSE_COMPLEMENT]": { "qlen": 9718, "hypermutation_probablility": 0.13527282947774355, "inferred_subtype": "Ref.01_AE.AF.07.569M.GQ477441", @@ -71,6 +71,18 @@ "orfs_end": 8006, "blast_n_conseqs": 0 }, + "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED": { + "qlen": 9718, + "hypermutation_probablility": 7.126591271466864e-05, + "inferred_subtype": "Ref.01_AE.AF.07.569M.GQ477441", + "blast_matched_qlen": 1, + "blast_sseq_coverage": 0.0, + "blast_qseq_coverage": 0.0, + "blast_sseq_orfs_coverage": 0.0, + "orfs_start": 1, + "orfs_end": 8006, + "blast_n_conseqs": 0 + }, "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.WITH-XS[REVERSE_COMPLEMENT]": { "qlen": 9711, "hypermutation_probablility": 0.13527282947774355, diff --git a/tests/expected-results-edgy/nonintact.fasta b/tests/expected-results-edgy/nonintact.fasta index 982acdf..13a0da0 100644 --- a/tests/expected-results-edgy/nonintact.fasta +++ b/tests/expected-results-edgy/nonintact.fasta @@ -4,7 +4,7 @@ GACTCTGGGAGTGAGAGAT A >empty-sequence >empty-sequence2 ->Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED[REVERSE_COMPLEMENT] +>Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSECOMPLEMENTED[REVERSE_COMPLEMENT] TGGAAGGGCTAATTCACTCCCAACGAAGACAAGATATCCTTGATCTGTGGATCTACCACA CACAAGGCTACTTCCCTGATTAGCAGAACTACACACCAGGGCCAGGGATCAGATATCCAC TGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGAGAAGTTAGAAGAAGCCA @@ -167,6 +167,169 @@ CTGGGGAGTGGCGAGCCCTCAGATCCTGCATATAAGCAGCTGCTTTTTGCCTGTACTGGG TCTCTCTGGTTAGACCAGATCTGAGCCTGGGAGCTCTCTGGCTAACTAGGGAACCCACTG CTTAAGCCTCAATAAAGCTTGCCTTGAGTGCTTCAAGTAGTGTGTGCCCGTCTGTTGTGT GACTCTGGTAACTAGAGATCCCTCAGACCCTTTTAGTCAGTGTGGAAAATCTCTAGCA +>Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED +ACGATCTCTAAAAGGTGTGACTGATTTTCCCAGACTCCCTAGAGATCAATGGTCTCAGTG +TGTTGTCTGCCCGTGTGTGATGAACTTCGTGAGTTCCGTTCGAAATAACTCCGAATTCGT +CACCCAAGGGATCAATCGGTCTCTCGAGGGTCCGAGTCTAGACCAGATTGGTCTCTCTGG +GTCATGTCCGTTTTTCGTCGACGAATATACGTCCTAGACTCCCGAGCGGTGAGGGGTCAG +GGCGGGTCCGGTGCGGAGGGACCTTTCAGGGGTCGCCTTTCAGGGAACATCGTTCGAGCT +ACAGTCGTCAAGAACTTCATGAGGCCTACGTCGAGAGCCCGGTGCACTACTTTACGATCC +GCCGACAGTTTGGAGGTGAGATTGTGAAGAGAGAGGCCCAGTAGGTAGGGTACGTCCGAG +TGTCCCACATTGTTCGACCACAAGAGAGGAAATAACCGGAGAAGATAGAATAGACCGAGT +TGACCATGATCGAACATCGTGGTAGGTTTCCAGTCACCTATAGACTGGGGACCGGGACCA +CACATCAAGACGGTTAGTCCCTTCATCGGAACACACACCATCTAGGTGTCTAGTTCCTAT +AGAACAGAAGAAACCCTCACTTAATCGGGAAGGTCAGGGGGGAAAAGAAAATTTTTCACC +GATTCTAGATGTCGACGGAACATTCAGTAACCAGAATTTCCATGGACTCCACACTGACCT +TTTGGGTGGAGGAGGAGGAGAACACGAAGATCGGTCCGTGTTCGTCGTAACCATCGACGA +CATAACGATGAACACTAACGAGGTACAAAAAGGTCCAGAGCTCTACGACGAGGGTGGGAT +AGACGACGACCGAGTCGAGCAGAGTAAGAAAGGGAATGTCATCCGGTAGGTTAGTGTGAT +GAAAAACTGGTGAACGGTGGGTAGAATATCGTTTTAGGAAAGGTTCGGGACAGAATAAGA +AGATCCATACACCGCTTATCGAGATGTTCGAGGAACATGATGAAGATATTGGGATAGACA +GGGGAGTCGATGACGATACCGACACCGTAACTCGTTCGATTGTCGTGATAAGAAATCAAG +GACTGAGGTTATGACATCCTCTAAGGTGGTTATAAACTCCCGAAGGGTGGGGGACGCAGG +GTCTTCAAGGTGTTAGGAGCAATGTTAGTTCTCATTCAGAGAGTTCGCCACCATCGACTT +CTCCGTGTCCGAGGCGTCTAGCAGGGTCTATTCACGGTTCCTAGGCAAGTGATTAGCTTA +CCTAGACAGAGACAGAGAGAGAGGTGGAAGAAGAAGCCAAGGAAGCCCGGACAGCCCAGG +GGAGCCCCAACCCTCCACCCAGACTTTGCTATTACCACTTATAGGGACGGATTGAGATAA +GTGATATCTTTCATGTCGTTTTTGATAAGAATTTGGATGGTTCGGAGGATGATAGTAATA +CTTATTAAAATATATGGTGTCGGTTAAACAATACAATTTGGTTAAGGTGTTTGAACGGGT +AAATAGATTAAGGTTATTAAGAACAAGTAAGAAAAGAACGACCAAAACGCTAAGAAGTTA +ATTCCTCACATAATTCGAACACATTAACAATTAAAGAGACAGGGTGAGGTAGGTCCAGCA +CACTAAGGTTTAGACAAGGTCTCTAAATAATGAGGTTGATCGTAAGGTTCCGTGTCGTCA +CCACGTTTACTCAAAAGGTCTCGTTGGGGTTTAGGGGTCCTCGACAACTAGGAAATCCAT +AGAAAGGTGTCGGTCCTAAGAACGGACCTCGACGAACTACGGGGTCTGACACTCAACGTT +GTCTACGACAACGCGGAGTTATCGGGAGTCGTTTAACAAGACGACGACGTGATATGGTCT +GTTATTAACAGACCGGACATGGCAGTCGCAGTAACTCCGACGCGGGTATCACGAAGGACG +ACGAGGGTTCTTGGGTTCCTTGTTTCGAGGATAAGGGTGACGAGAAAAAAGAGAGACGTG +GTGAGAAGAGAAACGGAACCACCCACGATGAGGATTACCAAGTTAAAAATGATGAAATAT +AAATATATTAAGTGAAGAGGTTAACAGGGAGTATAGAGGAGGAGGTCCAGACTTCTAGAG +CCTGAGTAACAACGATAATGGTGGTAGAGAACAATTATCGTCGGGACATTATAAACTACT +TGTAGATTAAACAGGTGACTACCCTCCCCGTATGTAACGAAAAGGATGAAAGACGGTGTA +CAAATATTAAACAAAATAAGACGTACCCTCCCACTAACACAGTGAAGGAAGTCACAATAA +ACTGGGAAGTCATGAGGTTCATGATAATTTGGTTCATGATAATTTGTCAACACAACTTAA +TGTCATCTTTTTAAGGGGAGGTGTTAATTTTGACACGCAATGTTAAAGACCCAGGGGAGG +ACTCCTAACGAATTTCTAATAACAAAATAATAAAGGTTTAACAAGAGAATTAAACGATCG +ATAGACAAAATTTCACAATAAGGTAAAACGAGATGATTACAATGTTACACGAACAGAGTA +TAAAGGATAAAAAGGATAACATTGTTTACGAGAGGGACCAGGAGAGACCTATGCCTAAGA +AAAAGAACATAACAACAACCCAGAACATGTTAATTAAAGATGTCTACACAAGTCGACATG +ATAATACCAAAATCGTAACAGGCACTTTAACTGTCTAGATTAATGATGGAGAAGAAGACG +ATCTGACGGTAAATTGTCGTCAACTCAACTATGATGACCGGATTAAGGTACACATGTAAC +ATGACACGACTGTAAACATGTACCAGGACAAGGTAACTTGCAGAATAATAATGTAAAATC +TTAGCGTTTTGGTCGGCCCCGTGTTATTACATACCCTTAACCGAGTTTCCTATGGAAACC +TGTCCGGACACATTACTGACTCCACAATGTTGAACAGTTGAATATCGACCATCATAGTAA +TAGATAACCATAATATAGTTCAAATATTTTTTTACGTATAAGAAAGACGTGGAATGGAGA +ATACGAACACGACTATAACTTTCTCGTCAAAAAATAGAGAGGAAAGAGGTAATAGTAAGA +GGGCGATGATGATAACCATAATCATAGTAAGAAGTTTAGTCACGTGAAATTTGATTGTGT +CTCACCCCAATTAAAATGTGTACCGAAATCCGAAACTAGGGTATTTGACTAATATAGGAG +TACGTAGACAAGATGGTACAGTAAAAAGGTGTACAATTTTAAAAGACAGTGTAAATGGTT +ATGATGAAGAACACCCAACCCCAGACACCCATGTGTCCGTACACACCGGGTTTGTAATAC +ATGGAGACATAGTATACGAAATCGTAGACTACGTGTTTTATCTCACCACCAACGAAGGAA +GGTGTGTCCATGGGGTATTATCTGACACTGGGTGTTAAAAAGACATCGTGATGTCTAGTA +GTTGTAGGGTTCCTCGTACCACGGGGTAGAGGTGGGGGTAGAGGTGTTCACGACTATAAA +GAGGAAGTGAGAGTAACGGTGACAGAAGACGAGAAAGATAATCAGATAGTTAATTGGACA +GATAAAAAGAAACAGAATTATAAAAGGATATAAGATACTAATGATACCTGGTGTGTTGAT +AACGATAATAATAACGATGATGATTACGATGATAACGATGATAACCATATCCAACGTAAT +GTACATGATGAATGACGAAACTATCTCTTCGAACTACTCAGACTGACAAGACTACTCGAG +AAGCAGCGACAGAGGCGAAGAAGGACGGTATCCTCTACGGATTCCGAAAACAATACTTTG +TTTGAACCGTTACTTTCGTTGTGAAAAATGTTATCGTTAACCATGTTCGTCAAAATCCGA +CTGAAGGACCTACGAAGGTCCCGAGATCAGATCCTAGATGACCGAGGTAAAGAACGAGAG +GAGACAGCTCATTGCGGATAAGACGATACAGCTGTGGGTTAAGACTTTACCTATTTGTCG +TCAACAACGTCTTAAGAATAATACCGAAGGTGAGGACGGGTTCATAGGGGTATTCAAAGT +ATCTATACAACGGGATTCGGTACCTCGGTTTAGGATCCTTTTACAGATTGTCGAAGTAAG +AATTCGAGGAGATTTTCGAGATCACAGGTAAGTAACACACCGAGGGAGACACCGGGAACC +AGAAGACCCCGAACAAGGTAGATAGGAGACAGTCAAAGCATTGTGATCCGTTTCCACCGA +AATAGAAAAAACCACAATAATTACGACGATCACGGTTCATAACATCTCTAGGATGGAACA +ATACAGGACGAACTATAAGTGTGGATCCCGATTGATACACAGGATTATTCCGGAAAGAAT +ATCGTCTCAGACTTTTTGTCAGTTTCATTATGTCTACTTAATCAACCAGACGATCAAGTC +CCAGATGAACACACGATATAGAGAAAAAGGAGGTAAGATACCTCTGAGGGACTGGGTTTA +CGGTCAGAGAAAGAGGACATACGTCTGGGGTTATACAACAATAATGGTTAGATCGTAGGG +GATCACCCTACACATGAAGACTTGAATAAGAACCTACTCCCGAAAGTATCACTACAGATA +TTTTGGTAGGGGATCGAAAGGGACTTTGTATGTATACCACAAAATGATTTGAAAAGGTAC +AAGATTAGGAGTAGGACAGATGAACGGTGTGTTAGTAGTGGACGGTAGACAAAAGGTATT +AGGGATTACTAGAAACGAAAAGAAGAACCGTGATGAAAATACAGTGATAATAGAACATAA +TGATGACGGGGAAGTGGAAAGGTCTCCTCGAAACGACCAGGAAAGGTTTCACCTAAAGAC +GACAGGGACATTATTTGGGCTTTTAAAACTTAAAAACATTAAACAAAAACATTAAGAAAT +CAAACATACAGACAACGATAATACAGATGATAAGAAAGGGGACGTGACATGGGGGGTTAG +GGGGGAAAAGAAAATTTTAACACCTACTTATGACGGTAAACATGACGACAGAATTCTACA +AGTCGGACTAGAGAATGGACAGGATATTAAAAGAAATTAAGAAATAAGTATCTAAGATGA +TGAGGAACTGAAACCCCTAACATCCCTTAAGGTTTAAGGACGAACTAAGGGCGGGTGGTT +GTCCGCCGGGATTGGCATCGTGGCCACTTTAACGACGGTAACAGTCATACATAACAAAAA +TGACCGGTAGAAGGACGATTAAAATTTTCTTTTATACGACAAAGGACGGGACAAAGACGA +CCTTATTGAAGACGAAGATATATAGGTGACCGATGTACTTGACGATGGTCCTATTGAAAA +GGAAGATTTACACATGTTAGATCAACGGTATAAGGACCTGATGTCAGATGAACAGGTACG +TACCGAAGAGGAAAATCGACTGTAAATAGTGTCGACCGATGATAAAGAAAACGATGATGT +CCACCGTCCAATTTTAGTGATCGGTAACGAGAGGTTAATGACACTATAAAGAGTACAAGT +AGAACCCGGAATAGATAAGGTAGATTTTTATCATGAAAGGACTAAGGTCGTGACTGATTA +AATAGATGAACAAGTAAAGGAGGTTAAGGAAACACACGACCATGGGTACGGTCTATCTGG +AAAAGGAAAAAATAATTGACGAGATAATAAACTAACTGATTGAGACTAAGTGAAACTAGA +CCAACACGAACTTACTAAGGATTACGTATAACACTCAGACAATGATACAAATGAAGATTA +GGGCTTAGGACGTTTCGATCTATTTAACGAACATTGAGTCAGAAGACTAAACAACACAGT +CAATCCCACTGTTGAAAAACAGAAGGAGATAATCATTGTATAGGACGAAAAGGATTAAAT +CAGAGGGACAATCGACGGGGTAGATGTATCTTCCAAAGACGAGGATGATACCCAAGAAAG +AGATTGACCATGGTATTAAAGTGATTCCCTCCCCATAATTGTTTGAGGGTGAGTCCTTAG +GTCCACCGAACGGTTATGAGACAGGTGGTACAAAGGGTACAAAGGAAAACATACCCGTCA +AATTTAAATCCTCAGAAAGGGGTATAATGATACGAAAGACACCAATAAAAAACGTGACGG +AGACAATTAACAAAATGTAGTAATCACACCCGTGGGGAGTAAGAACGTATAAAAGGACAA +AAGTCTAAAAATTTACCGAGAACTATTTAAACTATACAGGTAACCGGAACGGGGACGAAG +ACATAAAGACGATAATTCAGAAAACTACCCAGTATTATGTGAGGTACATGACCAAGAAAA +TCTTAGAGAGACAAAAGACGGTCAAGATCGAGACGAAGAAGACAATCACCATAATGAAGA +CAATCACGAAACCAAGGAGATTCCTCAAATGTATTAACGGAATGAAATTAGGGACCCATT +TAGACTGAACGGGTTAAGTTAAAGGGGTGATTGAAGACATACAGTAACTGTCAGGTCGAC +AGAAAAAGACCGTCGTGATATCCGACATGACAGGTAAATAGTCCTACCTCAAGTATTGGG +TAGGTTTCCTTACCTCCAAGAAAGACTACAAAAAACAGACCACACCATTCAGGGGTGGAG +TTGTCTACAACAGAGTCGAGGAGATAAAAACAAGATACGACGGGATAAAGATTCAGTCTA +GGATGTATGTTTAGTAGGTACATAACTATCTATTGATACAGACCTAAAACAAAAGATTTT +CCGAGATTCTAAAAACAGTACGATGAAACCTTATAACGACCACTAGGAAAGGTAGGGACA +CCTTCGTGTAACATGACTATAGATTAGGGACCACAGAGTAACAAATATGATCCATACCAT +TTACGTCATATGAAGGACTTCAGAAGTAGATTCCCTTGACTTTTTATACGTAGTGGGTGT +AGGTCATGACAATGACTAAAAAAGAAAAAATTGGGACGCCCTACACCATAAGGATTAACT +TGAAGGGTCTTCAGAACTCAAGAGAATAATTCAAGAGACTTTAGATGATTAAAAGAGGTA +AATCATGACAGAAAAAAGAAATACCGTTTATGACCTCATAACATACCTAAAAGTCCGGGT +TAAAAACTTTAAAAGGGAAGGAAAAGGTAGAGACATGTTTAAAGATGATTACGAAAATAA +AAAAGAAGACAGTTACCGGTAACAAATTGAAAACCCGGTAGGTAAGGACCGAAATTAAAA +TGACCATGTCAGAGTTATCCCGATTACCCTTTTAAATTTCACGTTGGTTAGACTCAGTTG +TCTAAAGAAGGTTAATACAACTGTCCACATCCAGGATGATTATGACATGGATATCGAAAT +ACAGGTGTCTAAAGATACTCATAGACTAGTATGACAGAATGAAACTATTTTGGAGGTTAA +GGGGGATAGTAAAAACCAAAGGTAGAAGGACCGTTTGAGTAAAGAAGATTATGACATAGT +AGACGAGGACATAGATTATCTCGAAGGAAATCAACGGGGGGATAGAAATAACACTGCTCC +CCAGCAACGGTTTCTCACTGGACTCCCTTCAATTTCCTATGTCAAGGAACAGATAGCCGA +GGACGAAGACTCCCCCTCAACAACAGAGATGGGGTCTGGACTTCGAGAGAAGACCACCCC +GACAACCGAGACCAGACGAGACTTCTTTTAAGGGACCGGAAGGGAACATCCTTCCGGTCT +AGAAGGGATTTTTTAATCGGACAGAGAGTCATGTTAGAAAGTAAACCACAGGAAGGAAAG +GTGTAAAGGTTGTCGGGAAAAAGGATCCCCGGGACGTTAAAGACCGACACACGGGAAGAA +ACGGTGTTAACTTTGTGAATTGTTAGAAAGAAACCAAGGATTTTAACGGAGAGACGTAGT +AATACCATCGACTTAAACAATGAACCGAGTAACGAAGTCGGTTTTGAGAACGGAATACCG +GCCCAGGAGGATGAGGGACTGTACGACAGTAGTAAAGAAGATCACATCGGCGACCAGGGT +TACGAAAATTTTATCAGAATGTTAGACCCAAGCGTAAAACCTGGTTGTTCCAAAGACAGT +AGGTTAAAAAATGGAGGACACTTCGAACGAGCCGAGAATCTCAAAATATCTTGGCCAGAT +GTATCAGAGATTTCCCAAGGAAACCAGGAACAGAATACAGGTCTTACGACCATCCCGATA +TGTAAGAATGATAAAATAAATTAGGGTCCTAATAGGTAGAAAATATTTAAAGAGGATGAC +CCTATCCACCTAATAAACAGTAGGTAGGATAAACAAGGACTTCCCATGATCATCAAGGAC +GATACAGTGAAGGGGAACCAAGAGAGTAGACCGGACCACGTTATCCGGGACGTACGTGAC +CTACGTGAGATAGGGTAAGACGTCGAAGGAGTAACTACCAGAGAAAATTGTAAACGTACC +GACGAACTACAGGGGGGTGACACAAATCGTACCACAAATTTAGAACACCCCACCGAGGAA +GACTATTACGACTTTTGTACCCATAGTGAAGACCCGACTTTCGGAAGAGAAGATGATGAA +AATGGGTACGTAAATTTCAAGATCCACTATACCGGACTACATGGTAAACGGGGACCTACA +AGACGTGATATCCCATTAAAACCGACTGGACTAACGACACAGGACACAGTCGACGACGAA +CGACACGAAAAAAGAATGAAAACAAAACGAGAAGGAGATAGAACAGATTTCGAAGGAACC +ACAGAAAATAGAGATAGGAAACTACGTGTGTTATCTCCCAACGATGACATAATATATTAC +TAGATTCAAGAAGACTAGGACAGACTTCCCTACCAACATCGACAGGGTCATAAACAGATG +TCGGAAGACTACAAAGATTGTCCGGTCCTAATTGACGCTTAGCAAGATCGAGGGACGAAC +GGGTATGATATACAAAATTAAATATAAAAAAGAAAGGGGGACCGGAATTGGCTTAAAAAA +GGGTAGCTAGATTAAGAGGGGGCGAATTATGACTGCGAGAGCGTGGGTAGAGAGAGGAAG +ATCGGAGGCGATCAGTTTTAAAAACCGCATGAGTGGTCAGCGGCGGGGAGCGGAGAACGG +CACGCGCGAAGTCGTTCGGCTCAGGACGCAGCTCTCTCGAGGAGACCAAAGGGAAAGCGA +AAGTCCAGGGACAAGCCCGCGGTGACGATCTCTAAAAGGTGTGACTGATTTTCCCAGACT +CCCTAGAGATCAATGGTCTCAGTGTGTTGTCTGCCCGTGTGTGATGAACTTCGTGAGTTC +CGTTCGAAATAACTCCGAATTCGTCACCCAAGGGATCAATCGGTCTCTCGAGGGTCCGAG +TCTAGACCAGATTGGTCTCTCTGGGTCATGTCCGTTTTTCGTCGACGAATATACGTCCTA +GACTCCCGAGCGGTGAGGGGTCAGGGCGGGTCCGGTGCGGAGGGACCTTTCAGGGGTCGC +CTTTCAGGGAACATCGTTCGAGCTACAGTCGTCAAGAACTTCATGAGGCCTACGTCGAGA +GCCCGGTACACTACTTTACGATCCGCCGACAGTTTGGAGGTGAGATTGTGAAGAGAGAGG +CCCAGTAGGTAAGGTACGTCCGAGTGTCCCACATTGTTCGACCACAAGAGAGGAAACAAC +CGAAGAAGATTGAAGAGACCGAGTTGACCATGATCGAACATCGTGGTAGGTTTCCAGTCA +CCTATAGACTAGGGACCGGGACCACACATCAAGACGATTAGTCCCTTCATCGGAACACAC +ACCATCTAGGTGTCTAGTTCCTATAGAACAGAAGCAACCCTCACTTAATCGGGAAGGT >Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.WITH-XS[REVERSE_COMPLEMENT] TGGAAGGGCTAATTCACTCCCAACGAAGACAAGATATCCTTGATCTGTGGATCTACCACA CACAAGGCTACTTCCCTGATTAGCAGAACTACACACCAGGGCCAGGGATCAGATATCCAC diff --git a/tests/expected-results-edgy/orfs.json b/tests/expected-results-edgy/orfs.json index 620d60c..a7d4b99 100644 --- a/tests/expected-results-edgy/orfs.json +++ b/tests/expected-results-edgy/orfs.json @@ -779,7 +779,7 @@ "subtype_nucleotides": "ATGGGGAGTAAGTGGTCAAAAAGTAGCATAGTGGGATGGCCTCAGGTCAGGGAAAAAATAAAGAAAACTCCTCCAGCAGCAGAAGGAGTAGGAGCAGTATCTCAAGATCTAGATAAACATGGAGCAGCAGAAGGAGTAGGAGCAGTATCTCGAGATCTAGATAAACATGGAGCAGTAACAAGTAGTAATATGAATAATGCTGATAATGTCTGGCTGAGAGCACAAGAAGAAGAAGGGGACGACGAGGGGGTAGGCTTTCCAGTCAGGCCGCAGGTACCTCTAAGACCAATGACTTTTAAGGGAGCTTTTGATCTTAGCTTCTTTTTAAAAGAAAAGGGGGGACTGGATGGGCTAATTTACTCCAAGAAAAGACAAGAGATCCTTGATTTATGGGTCTACCATACACAAGGCTTCTTCCCTGATTGGCAAAACTACACACCGGGGCCAGGGACCAGATTCCCACTGTGTTTTGGATGGTGCTTCAAGTTAGTACCAGTTGACCCAAGCACAGTAGAGGAAGACAACAAAGGAGAAAACAACTGCCTGTTACACCCCATGAGCCAGCATGGAATAGAGGACGAAGAAAGAGAAGTGCTGATATGGAAGTTTGACAGTGCCCTAGCACGAAAACACATAGCCCGAGAACTGCATCCAGAGTACTATAAAGACTGCTGA" } ], - "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED[REVERSE_COMPLEMENT]": [ + "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSECOMPLEMENTED[REVERSE_COMPLEMENT]": [ { "name": "gag", "start": 789, @@ -935,6 +935,162 @@ "subtype_nucleotides": "ATGGGGAGTAAGTGGTCAAAAAGTAGCATAGTGGGATGGCCTCAGGTCAGGGAAAAAATAAAGAAAACTCCTCCAGCAGCAGAAGGAGTAGGAGCAGTATCTCAAGATCTAGATAAACATGGAGCAGCAGAAGGAGTAGGAGCAGTATCTCGAGATCTAGATAAACATGGAGCAGTAACAAGTAGTAATATGAATAATGCTGATAATGTCTGGCTGAGAGCACAAGAAGAAGAAGGGGACGACGAGGGGGTAGGCTTTCCAGTCAGGCCGCAGGTACCTCTAAGACCAATGACTTTTAAGGGAGCTTTTGATCTTAGCTTCTTTTTAAAAGAAAAGGGGGGACTGGATGGGCTAATTTACTCCAAGAAAAGACAAGAGATCCTTGATTTATGGGTCTACCATACACAAGGCTTCTTCCCTGATTGGCAAAACTACACACCGGGGCCAGGGACCAGATTCCCACTGTGTTTTGGATGGTGCTTCAAGTTAGTACCAGTTGACCCAAGCACAGTAGAGGAAGACAACAAAGGAGAAAACAACTGCCTGTTACACCCCATGAGCCAGCATGGAATAGAGGACGAAGAAAGAGAAGTGCTGATATGGAAGTTTGACAGTGCCCTAGCACGAAAACACATAGCCCGAGAACTGCATCCAGAGTACTATAAAGACTGCTGA" } ], + "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED": [ + { + "name": "gag", + "start": 48, + "end": 2339, + "orientation": "forward", + "distance": 0.7685703620754012, + "protein": "MSVFRRRIYVLDSRAVRGQGGSGAEGPFRGRLSGNIVRATVVKNFMRPTSRARCTTLRSADSLEVRL", + "aminoacids": "MVSVCCLPVCDELREFRSK*LRIRHPRDQSVSRGSESRPDWSLWVMSVFRRRIYVLDSRAVRGQGGSGAEGPFRGRLSGNIVRATVVKNFMRPTSRARCTTLRSADSLEVRL*RERPSR*GTSECPTLFDHKRGNNRRR*NRPS*P*SNIVVGFQSPIDWGPGPHIKTVSPFIGTHTI*VSSSYRTEETLT*SGRSGGKRKFFTDSRCRRNIQ*PEFPWTPH*PFGWRRRRTRRSVRVRRNHRRHNDEH*RGTKRSRALRRGWDRRRPSRAE*ERECHPVG*CDEKLVNGG*NIVLGKVRDRIRRSIHRLSRCSRNMMKILG*TGESMTIPTP*LVRLS**EIKD*GYDIL*GGYKLPKGGGRRVFKVLGAMLVLIQRVRHHRLLRVRGV*QGLFTVPRQVISLPRQRQRERWKKKPRKPGQPRGAPTLHPDFAITTYRDGLR*VISFMSFLIRIWMVRRMIVILIKIYGVG*TIQFG*GV*TGK*IKVIKNK*EKNDQNAKKLIPHIIRTH*QLKRQGEVGPAH*GLDKVSK**G*S*GSVSSPRLLKRSRWGLGVLDN*EIHRKVSVLRTDLDELRGLTLNVVYDNAELSGVV*QDDDVIWSVINRPDMAVAVTPTRVSRRTTRVLGFLVSRIRVTRKKRDVVRRETEPPTMRITKLKMMKYKYIK*RG*QGV*RRRSRLLEPE*QR*WW*RTIIVGTL*TTCRLNR*LPSPYVTKRMKDGVQILNKIRRTLPLTQ*RKSQ*TGKS*GS**FGS**FVNTT*", + "nucleotides": "ATGGTCTCAGTGTGTTGTCTGCCCGTGTGTGATGAACTTCGTGAGTTCCGTTCGAAATAACTCCGAATTCGTCACCCAAGGGATCAATCGGTCTCTCGAGGGTCCGAGTCTAGACCAGATTGGTCTCTCTGGGTCATGTCCGTTTTTCGTCGACGAATATACGTCCTAGACTCCCGAGCGGTGAGGGGTCAGGGCGGGTCCGGTGCGGAGGGACCTTTCAGGGGTCGCCTTTCAGGGAACATCGTTCGAGCTACAGTCGTCAAGAACTTCATGAGGCCTACGTCGAGAGCCCGGTGCACTACTTTACGATCCGCCGACAGTTTGGAGGTGAGATTGTGAAGAGAGAGGCCCAGTAGGTAGGGTACGTCCGAGTGTCCCACATTGTTCGACCACAAGAGAGGAAATAACCGGAGAAGATAGAATAGACCGAGTTGACCATGATCGAACATCGTGGTAGGTTTCCAGTCACCTATAGACTGGGGACCGGGACCACACATCAAGACGGTTAGTCCCTTCATCGGAACACACACCATCTAGGTGTCTAGTTCCTATAGAACAGAAGAAACCCTCACTTAATCGGGAAGGTCAGGGGGGAAAAGAAAATTTTTCACCGATTCTAGATGTCGACGGAACATTCAGTAACCAGAATTTCCATGGACTCCACACTGACCTTTTGGGTGGAGGAGGAGGAGAACACGAAGATCGGTCCGTGTTCGTCGTAACCATCGACGACATAACGATGAACACTAACGAGGTACAAAAAGGTCCAGAGCTCTACGACGAGGGTGGGATAGACGACGACCGAGTCGAGCAGAGTAAGAAAGGGAATGTCATCCGGTAGGTTAGTGTGATGAAAAACTGGTGAACGGTGGGTAGAATATCGTTTTAGGAAAGGTTCGGGACAGAATAAGAAGATCCATACACCGCTTATCGAGATGTTCGAGGAACATGATGAAGATATTGGGATAGACAGGGGAGTCGATGACGATACCGACACCGTAACTCGTTCGATTGTCGTGATAAGAAATCAAGGACTGAGGTTATGACATCCTCTAAGGTGGTTATAAACTCCCGAAGGGTGGGGGACGCAGGGTCTTCAAGGTGTTAGGAGCAATGTTAGTTCTCATTCAGAGAGTTCGCCACCATCGACTTCTCCGTGTCCGAGGCGTCTAGCAGGGTCTATTCACGGTTCCTAGGCAAGTGATTAGCTTACCTAGACAGAGACAGAGAGAGAGGTGGAAGAAGAAGCCAAGGAAGCCCGGACAGCCCAGGGGAGCCCCAACCCTCCACCCAGACTTTGCTATTACCACTTATAGGGACGGATTGAGATAAGTGATATCTTTCATGTCGTTTTTGATAAGAATTTGGATGGTTCGGAGGATGATAGTAATACTTATTAAAATATATGGTGTCGGTTAAACAATACAATTTGGTTAAGGTGTTTGAACGGGTAAATAGATTAAGGTTATTAAGAACAAGTAAGAAAAGAACGACCAAAACGCTAAGAAGTTAATTCCTCACATAATTCGAACACATTAACAATTAAAGAGACAGGGTGAGGTAGGTCCAGCACACTAAGGTTTAGACAAGGTCTCTAAATAATGAGGTTGATCGTAAGGTTCCGTGTCGTCACCACGTTTACTCAAAAGGTCTCGTTGGGGTTTAGGGGTCCTCGACAACTAGGAAATCCATAGAAAGGTGTCGGTCCTAAGAACGGACCTCGACGAACTACGGGGTCTGACACTCAACGTTGTCTACGACAACGCGGAGTTATCGGGAGTCGTTTAACAAGACGACGACGTGATATGGTCTGTTATTAACAGACCGGACATGGCAGTCGCAGTAACTCCGACGCGGGTATCACGAAGGACGACGAGGGTTCTTGGGTTCCTTGTTTCGAGGATAAGGGTGACGAGAAAAAAGAGAGACGTGGTGAGAAGAGAAACGGAACCACCCACGATGAGGATTACCAAGTTAAAAATGATGAAATATAAATATATTAAGTGAAGAGGTTAACAGGGAGTATAGAGGAGGAGGTCCAGACTTCTAGAGCCTGAGTAACAACGATAATGGTGGTAGAGAACAATTATCGTCGGGACATTATAAACTACTTGTAGATTAAACAGGTGACTACCCTCCCCGTATGTAACGAAAAGGATGAAAGACGGTGTACAAATATTAAACAAAATAAGACGTACCCTCCCACTAACACAGTGAAGGAAGTCACAATAAACTGGGAAGTCATGAGGTTCATGATAATTTGGTTCATGATAATTTGTCAACACAACTTAA", + "subtype_start": 1, + "subtype_end": 1497, + "subtype_aminoacids": "MGARASVLSGGKLDAWEKIRLRPGGKKKYKMKHLVWASRELERFALNPGLLETAEGCQQIIEQLQSTLKTGSEELKSLYNTVVTLWCVHQRIEVKDTKEALDKIEEEQKKSRQKTQQAAAGTGNSSQASQNYPIVQNAQGQMVHQPLSPRTLNAWVKVVEEKGFNPEVISMFSALSEGATPQDLNMMLNIVGGHQAAMQMLKDTINEEAAEWDRTHPVHAGPIPPGQMREPRGSDIAGTTSTLQEQIGWMTNTPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFFKTLRAEQATQEVKNWMTETLLVQNANPDCRSILKALGSGATLEEMMTACQGVGGPSHKARVLAEAMSQAQHATIMMQRGNFKGQKRIKCFNCGKEGHLARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSTNKGRPGNFPQSRPEPTAPPADWGMGEEITSLLKQEQKDKEHLPPTVSLKSLFGNDPLSQ*", + "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGTGGGGGAAAATTAGATGCATGGGAAAAAATTCGGTTACGGCCAGGAGGAAAGAAAAAATATAAGATGAAACATTTAGTATGGGCAAGCAGAGAGTTAGAAAGATTCGCACTTAATCCTGGCCTATTAGAAACAGCAGAAGGATGTCAACAAATAATAGAACAGTTACAGTCAACTCTCAAGACAGGATCAGAAGAACTTAAATCCTTATATAATACAGTAGTAACCCTCTGGTGCGTACACCAAAGGATAGAGGTAAAAGACACCAAGGAAGCTTTAGATAAAATAGAGGAAGAACAAAAGAAGAGCCGGCAAAAGACACAGCAGGCAGCAGCTGGCACAGGAAACAGCAGCCAAGCCAGCCAAAATTACCCTATAGTGCAAAATGCACAAGGGCAAATGGTACATCAGCCTTTATCACCTAGAACTTTGAATGCATGGGTGAAGGTAGTAGAAGAAAAGGGTTTTAACCCAGAAGTAATATCCATGTTCTCAGCATTATCAGAGGGAGCCACCCCACAAGATTTAAATATGATGCTAAATATAGTAGGGGGACACCAGGCAGCAATGCAAATGTTAAAAGACACCATCAACGAGGAAGCTGCAGAATGGGATAGGACACATCCAGTACATGCAGGGCCTATTCCACCAGGCCAAATGAGGGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAAGAACAAATAGGATGGATGACAAACACTCCTCCTATCCCAGTGGGAGAAATCTATAAAAGGTGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTGTTAGCATTTTGGACATAAGACAAGGGCCAAAAGAACCCTTCAGAGATTATGTAGATAGGTTCTTTAAAACTCTCAGAGCGGAACAAGCTACACAGGAGGTAAAAAACTGGATGACAGAAACCTTGTTAGTCCAAAATGCGAATCCAGACTGTAGGTCCATTTTAAAAGCATTAGGGTCAGGAGCTACATTAGAAGAAATGATGACAGCATGCCAAGGAGTGGGAGGACCTAGTCATAAAGCAAGAGTTCTGGCTGAGGCAATGAGCCAAGCACAACATGCAACTATAATGATGCAGAGAGGCAATTTTAAGGGCCAGAAAAGAATTAAGTGCTTCAACTGTGGCAAAGAAGGACACCTAGCCAGAAATTGCAGGGCCCCTAGAAAAAAGGGTTGTTGGAAATGCGGGAAGGAAGGACATCAAATGAAAGACTGCACTGAGAGACAGGCTAATTTTTTAGGGAAAATTTGGCCTTCCACCAACAAGGGAAGGCCGGGAAATTTTCCTCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGACTGGGGGATGGGGGAAGAGATAACCTCATTACTGAAACAGGAACAGAAAGACAAGGAACATCTTCCTCCTACAGTTTCCCTCAAATCACTCTTTGGCAACGACCCATTGTCCCAGTAA" + }, + { + "name": "pol", + "start": 2071, + "end": 5364, + "orientation": "forward", + "distance": 0.7726723799269597, + "protein": "SVSTEIEKTTIITTITVHNISRMEQYRTNYKCGSRLIHRIIPERISSQTFCQFHYVYLINQTIKSQMNTRYREKGGKIPLRDWVYGQRKRTYVWGYTTIMVRS", + "aminoacids": "YRGGGPDF*SLSNNDNGGREQLSSGHYKLLVD*TGDYPPRM*RKG*KTVYKY*TK*DVPSH*HSEGSHNKLGSHEVHDNLVHDNLSTQLNVIFLRGGVNFDTQC*RPRGGLLTNF**QNNKGLTRELNDR*TKFHNKVKRDDYNVTRTEYKG*KG*HCLREGPGETYA*EKEHNNNPEHVN*RCLHKST**YQNRNRHFNCLD**WRRRRSDGKLSSTQL**PD*GTHVT*HDCKHVPGQGNLQNNNVKS*RFGRPRVITYP*PSFLWKPVRTHY*LHNVEQLNIDHHSNR*P*YSSNIFLRIRKTWNGEYEHDYNFLVKK*RGKR***EGDDDNHNHSKKFSHVKFDCVSPQLKCVPKSETRVFD*YRST*TRWYSKKVYNFKRQCKWL**RTPNPRHPCVRTHRVCNTWRHSIRNRRLRVLSHHQRRKVCPWGII*HWVLKRHRDV**L*GSSYHGVEVGVEVFTTIKRK*E*R*QKTRKIIR*LIGQIKRNRIIKGYKILMIPGVLITIIITMMITMITMITISNVMYMMNDETISSNYSD*QDYSRSSDRGEEGRYPLRIPKTILCLNRYFRCEKCYR*PCSSKSD*RTYEGPEIRS*MTEVKNERRQLIADKTIQLWVKTLPICRQQRLKNNTEGEDGFIGVFKVSIQRDSVPRFRILLQIVEVRIRGDFRDHR*VTHRGRHREPEDPEQGR*ETVKAL*SVSTEIEKTTIITTITVHNISRMEQYRTNYKCGSRLIHRIIPERISSQTFCQFHYVYLINQTIKSQMNTRYREKGGKIPLRDWVYGQRKRTYVWGYTTIMVRS*GITLHMKT*IRTYSRKYHYRYFGRGSKGTLYVYHKMI*KGTRLGVGQMNGVLVVDGRQKVLGITRNEKKNRDENTVIIEHNDDGEVERSPRNDQERFHLKTTGTLFGLLKLKNIKQKH*EIKHTDNDNTDDKKGDVTWGVRGEKKILTPTYDGKHDDRILQVGLENGQDIKRN*EISI*DDEELKPLTSLKV*GRTKGGWLSAGIGIVATLTTVTVIHNKNDR*KDD*NFLLYDKGRDKDDLIEDEDI*VTDVLDDGPIEKEDLHMLDQRYKDLMSDEQVRTEEENRL*", + "nucleotides": "TATAGAGGAGGAGGTCCAGACTTCTAGAGCCTGAGTAACAACGATAATGGTGGTAGAGAACAATTATCGTCGGGACATTATAAACTACTTGTAGATTAAACAGGTGACTACCCTCCCCGTATGTAACGAAAAGGATGAAAGACGGTGTACAAATATTAAACAAAATAAGACGTACCCTCCCACTAACACAGTGAAGGAAGTCACAATAAACTGGGAAGTCATGAGGTTCATGATAATTTGGTTCATGATAATTTGTCAACACAACTTAATGTCATCTTTTTAAGGGGAGGTGTTAATTTTGACACGCAATGTTAAAGACCCAGGGGAGGACTCCTAACGAATTTCTAATAACAAAATAATAAAGGTTTAACAAGAGAATTAAACGATCGATAGACAAAATTTCACAATAAGGTAAAACGAGATGATTACAATGTTACACGAACAGAGTATAAAGGATAAAAAGGATAACATTGTTTACGAGAGGGACCAGGAGAGACCTATGCCTAAGAAAAAGAACATAACAACAACCCAGAACATGTTAATTAAAGATGTCTACACAAGTCGACATGATAATACCAAAATCGTAACAGGCACTTTAACTGTCTAGATTAATGATGGAGAAGAAGACGATCTGACGGTAAATTGTCGTCAACTCAACTATGATGACCGGATTAAGGTACACATGTAACATGACACGACTGTAAACATGTACCAGGACAAGGTAACTTGCAGAATAATAATGTAAAATCTTAGCGTTTTGGTCGGCCCCGTGTTATTACATACCCTTAACCGAGTTTCCTATGGAAACCTGTCCGGACACATTACTGACTCCACAATGTTGAACAGTTGAATATCGACCATCATAGTAATAGATAACCATAATATAGTTCAAATATTTTTTTACGTATAAGAAAGACGTGGAATGGAGAATACGAACACGACTATAACTTTCTCGTCAAAAAATAGAGAGGAAAGAGGTAATAGTAAGAGGGCGATGATGATAACCATAATCATAGTAAGAAGTTTAGTCACGTGAAATTTGATTGTGTCTCACCCCAATTAAAATGTGTACCGAAATCCGAAACTAGGGTATTTGACTAATATAGGAGTACGTAGACAAGATGGTACAGTAAAAAGGTGTACAATTTTAAAAGACAGTGTAAATGGTTATGATGAAGAACACCCAACCCCAGACACCCATGTGTCCGTACACACCGGGTTTGTAATACATGGAGACATAGTATACGAAATCGTAGACTACGTGTTTTATCTCACCACCAACGAAGGAAGGTGTGTCCATGGGGTATTATCTGACACTGGGTGTTAAAAAGACATCGTGATGTCTAGTAGTTGTAGGGTTCCTCGTACCACGGGGTAGAGGTGGGGGTAGAGGTGTTCACGACTATAAAGAGGAAGTGAGAGTAACGGTGACAGAAGACGAGAAAGATAATCAGATAGTTAATTGGACAGATAAAAAGAAACAGAATTATAAAAGGATATAAGATACTAATGATACCTGGTGTGTTGATAACGATAATAATAACGATGATGATTACGATGATAACGATGATAACCATATCCAACGTAATGTACATGATGAATGACGAAACTATCTCTTCGAACTACTCAGACTGACAAGACTACTCGAGAAGCAGCGACAGAGGCGAAGAAGGACGGTATCCTCTACGGATTCCGAAAACAATACTTTGTTTGAACCGTTACTTTCGTTGTGAAAAATGTTATCGTTAACCATGTTCGTCAAAATCCGACTGAAGGACCTACGAAGGTCCCGAGATCAGATCCTAGATGACCGAGGTAAAGAACGAGAGGAGACAGCTCATTGCGGATAAGACGATACAGCTGTGGGTTAAGACTTTACCTATTTGTCGTCAACAACGTCTTAAGAATAATACCGAAGGTGAGGACGGGTTCATAGGGGTATTCAAAGTATCTATACAACGGGATTCGGTACCTCGGTTTAGGATCCTTTTACAGATTGTCGAAGTAAGAATTCGAGGAGATTTTCGAGATCACAGGTAAGTAACACACCGAGGGAGACACCGGGAACCAGAAGACCCCGAACAAGGTAGATAGGAGACAGTCAAAGCATTGTGATCCGTTTCCACCGAAATAGAAAAAACCACAATAATTACGACGATCACGGTTCATAACATCTCTAGGATGGAACAATACAGGACGAACTATAAGTGTGGATCCCGATTGATACACAGGATTATTCCGGAAAGAATATCGTCTCAGACTTTTTGTCAGTTTCATTATGTCTACTTAATCAACCAGACGATCAAGTCCCAGATGAACACACGATATAGAGAAAAAGGAGGTAAGATACCTCTGAGGGACTGGGTTTACGGTCAGAGAAAGAGGACATACGTCTGGGGTTATACAACAATAATGGTTAGATCGTAGGGGATCACCCTACACATGAAGACTTGAATAAGAACCTACTCCCGAAAGTATCACTACAGATATTTTGGTAGGGGATCGAAAGGGACTTTGTATGTATACCACAAAATGATTTGAAAAGGTACAAGATTAGGAGTAGGACAGATGAACGGTGTGTTAGTAGTGGACGGTAGACAAAAGGTATTAGGGATTACTAGAAACGAAAAGAAGAACCGTGATGAAAATACAGTGATAATAGAACATAATGATGACGGGGAAGTGGAAAGGTCTCCTCGAAACGACCAGGAAAGGTTTCACCTAAAGACGACAGGGACATTATTTGGGCTTTTAAAACTTAAAAACATTAAACAAAAACATTAAGAAATCAAACATACAGACAACGATAATACAGATGATAAGAAAGGGGACGTGACATGGGGGGTTAGGGGGGAAAAGAAAATTTTAACACCTACTTATGACGGTAAACATGACGACAGAATTCTACAAGTCGGACTAGAGAATGGACAGGATATTAAAAGAAATTAAGAAATAAGTATCTAAGATGATGAGGAACTGAAACCCCTAACATCCCTTAAGGTTTAAGGACGAACTAAGGGCGGGTGGTTGTCCGCCGGGATTGGCATCGTGGCCACTTTAACGACGGTAACAGTCATACATAACAAAAATGACCGGTAGAAGGACGATTAAAATTTTCTTTTATACGACAAAGGACGGGACAAAGACGACCTTATTGAAGACGAAGATATATAGGTGACCGATGTACTTGACGATGGTCCTATTGAAAAGGAAGATTTACACATGTTAGATCAACGGTATAAGGACCTGATGTCAGATGAACAGGTACGTACCGAAGAGGAAAATCGACTGTAA", + "subtype_start": 1290, + "subtype_end": 4301, + "subtype_aminoacids": "FFRENLAFHQQGKAGKFSSEQTRANSPASRLGDGGRDNLITETGTERQGTSSSYSFPQITLWQRPIVPVKIGGQIKEALLDTGADDTVLEDINLPGKWKPKMIGGIGGFIKVRQYDQILIEIWGKKAIGTVLVGPTPVNIIGRNMLTQIGCTLNFPISPINTVPVTLKPGMDGPKVKQWPLTEEKIKALTEICKELEAEGKISKIGPENPYNTPIFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDESFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFKIKNPEIVIYQYMDDLYVGSDLEIGQHRTKIEELRAHLLSWGFTTPDKKHQKEPPFLWMGYELHPDRWTVQPIELPEKDSWTVNDIQKLVGKLNWASQIYAGIKVXQLCKLLRGAKALTDIVPLTXEAELELAXNREILKTPVHGVYYDPSKDLVAEVQKQGQDQWTYQIYQEPFKNLKTGKYARKRSAHTNDVKQLTEVVQKIATESIVIWGKTPKFRLPIQKETWETWWMEYWQATWIPEWEFVNTPPLVKLWYQLEKDPIVGAETFYVDGAASRETKLGKAGYVTDRGRQKVITLTETTNQKTELHAIHLALQDSGSEVNIVTDSQYALGIIQAQPDRSESEVVNQIIEELIKKEKVYLSWVPAHKGIGGNEQVDKLVSSGIRKVLFLDGIDKAQEEHERYHSNWKAMASDFNLPPVVAKEIVANCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKIIMVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKVIHTDNGSNFTSAAVKAACWWANVRQEFGIPYNPQSQGVVESMNKELKKIIGQVREQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDMIATDIQTKELQKQIINIQNFRVYYRDSRDPIWKGPAKLLWKGEGAVVIQDNSEIKVVPRRKAKIIRDYGKQMAGDDCVAGRQDED*", + "subtype_nucleotides": "TTTTTTAGGGAAAATTTGGCCTTCCACCAACAAGGGAAGGCCGGGAAATTTTCCTCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGACTGGGGGATGGGGGAAGAGATAACCTCATTACTGAAACAGGAACAGAAAGACAAGGAACATCTTCCTCCTACAGTTTCCCTCAAATCACTCTTTGGCAACGACCCATTGTCCCAGTAAAAATAGGAGGACAGATAAAAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGATATAAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGGATTGGAGGTTTTATCAAGGTAAGGCAATATGATCAGATACTTATAGAAATTTGGGGAAAAAAGGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGACGAAATATGTTGACTCAGATTGGTTGTACTTTAAATTTCCCAATTAGTCCTATTAACACTGTACCAGTAACATTAAAGCCAGGAATGGATGGGCCCAAGGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAACAGAAATTTGTAAAGAACTGGAGGCGGAAGGAAAAATCTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAATATTTGCTATAAAGAAAAAGGACAGCACCAAATGGAGAAAATTAGTAGATTTCAGAGAGCTTAATAAAAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCGCATCCAGCAGGATTAAAAAAGAAAAAATCAGTAACAGTACTAGATGTGGGAGATGCATATTTTTCAGTTCCTTTAGACGAAAGCTTTAGAAAGTATACTGCATTCACCATACCTAGTACAAACAATGAGACACCAGGAATCAGATATCAGTACAATGTACTGCCACAGGGATGGAAAGGATCACCTGCAATATTCCAGAGTAGCATGACAAAAATCTTAGAGCCCTTTAAAATAAAAAATCCAGAGATAGTCATCTATCAATACATGGATGACTTGTATGTRGGATCTGATTTAGAAATAGGGCAGCACAGAACAAAAATAGAGGAACTGAGAGCTCATCTGTTGAGCTGGGGATTTACTACACCAGACAAAAAGCATCAGAAGGAACCTCCATTCCTTTGGATGGGATATGAACTCCATCCTGATAGATGGACAGTCCAGCCTATAGAATTACCAGAAAAAGACAGCTGGACTGTCAATGATATACAGAAATTAGTGGGAAAACTAAATTGGGCAAGTCAAATTTATGCAGGGATTAAGGTAARRCAACTATGTAAACTCCTCAGGGGAGCTAAAGCACTAACAGACATAGTACCACTGACTRCAGAAGCAGAATTAGAGTTGGCAGRGAACAGGGAGATTCTAAAGACCCCTGTGCATGGAGTATATTATGACCCATCAAAAGACTTAGTAGCAGAAGTACAGAAACAAGGGCAGGACCAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTAAAAACAGGAAAATATGCCAGAAAAAGATCTGCTCACACTAATGATGTAAAACAATTAACAGAAGTAGTGCAAAAAATAGCCACAGAAAGCATAGTGATATGGGGAAAGACCCCTAAATTTAGACTACCCATACAAAAAGAAACATGGGAAACATGGTGGATGGAGTATTGGCAGGCTACCTGGATTCCGGAATGGGAGTTTGTTAATACCCCGCCTCTGGTAAAATTATGGTACCAATTAGAAAAAGACCCCATAGTAGGAGCAGARACTTTCTATGTAGATGGGGCAGCTAGTAGGGAGACTAAACTAGGAAAAGCAGGATATGTCACTGAYAGAGGAAGACAAAAGGTAATTACCCTCACTGAGACAACAAATCAAAAGACTGAATTACATGCAATCCATTTAGCCTTGCAGGATTCAGGATCAGAAGTAAATATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGACAGGAGTGAATCAGAAGTAGTCAACCAAATAATAGAGGAGCTAATAAAAAAAGAAAAGGTCTACCTGTCATGGGTACCAGCACACAAGGGGATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTTCAGGAATCAGGAAGGTACTATTTTTAGATGGGATAGATAAAGCTCAGGAAGAACATGAAAGATATCATAGCAATTGGAAAGCAATGGCTAGTGATTTTAATTTGCCACCTGTAGTAGCAAAGGAAATAGTAGCCAACTGTGATAAATGTCAACTAAAAGGGGAAGCTATGCATGGACAAGTAGAYTGTAGTCCAGGGATATGGCAATTAGATTGCACACATCTAGAAGGAAAAATCATCATGGTAGCAGTCCACGTGGCCAGTGGATATATAGAAGCAGAAGTTATCCCAGCAGAAACAGGACAGGAGACAGCATACTTTCTGCTGAAATTAGCAGGAAGATGGCCAGTAAAAGTAATACACACAGACAACGGAAGCAATTTCACCAGCGCTGCAGTTAAAGCAGCCTGTTGGTGGGCCAATGTCCGACAGGAATTTGGGATCCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAGGAGTTAAAGAAAATCATAGGGCAGGTAAGAGAGCAAGCTGAACACCTTAAGACAGCAGTACAAATGGCAGTATTCATTCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATGATAGCAACAGACATACAAACTAAAGAATTACAGAAACAAATTATAAACATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGACCCAATTTGGAAAGGACCAGCAAAACTACTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGACAATAGTGAAATAAAAGTAGTACCAAGAAGAAAAGCAAAGATTATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGATTAG" + }, + { + "name": "vif", + "start": 5293, + "end": 5817, + "orientation": "forward", + "distance": 0.7726682453594071, + "protein": "MLDQRYKDLMSDEQVRTEEENRL", + "aminoacids": "MLDQRYKDLMSDEQVRTEEENRL*IVSTDDKENDDVHRPILVIGNERLMTL*RVQVEPGIDKVDFYHERTKVVTD*IDEQVKEVKETHDHGYGLSGKGKNN*RDNKLTD*D*VKLDQHELTKDYV*HSDNDTNED*GLGRFDLFNEH*VRRLNNTVNPTVEKQKEIIIV*DEKD*", + "nucleotides": "ATGTTAGATCAACGGTATAAGGACCTGATGTCAGATGAACAGGTACGTACCGAAGAGGAAAATCGACTGTAAATAGTGTCGACCGATGATAAAGAAAACGATGATGTCCACCGTCCAATTTTAGTGATCGGTAACGAGAGGTTAATGACACTATAAAGAGTACAAGTAGAACCCGGAATAGATAAGGTAGATTTTTATCATGAAAGGACTAAGGTCGTGACTGATTAAATAGATGAACAAGTAAAGGAGGTTAAGGAAACACACGACCATGGGTACGGTCTATCTGGAAAAGGAAAAAATAATTGACGAGATAATAAACTAACTGATTGAGACTAAGTGAAACTAGACCAACACGAACTTACTAAGGATTACGTATAACACTCAGACAATGATACAAATGAAGATTAGGGCTTAGGACGTTTCGATCTATTTAACGAACATTGAGTCAGAAGACTAAACAACACAGTCAATCCCACTGTTGAAAAACAGAAGGAGATAATCATTGTATAGGACGAAAAGGATTAA", + "subtype_start": 4246, + "subtype_end": 4821, + "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWNSLVKHHKYISKKAKKWLYRHHYESQNPKVSSEVQIPLGEGRLIIRTYWGLQTGEKDWQLGHGVSIEWRLRKYNTQIDPDLADQLIHLHYFDCFSDSAIRKAILGQVVRHRCDYPSGHNKVGSLQYLALKALIAPKKTKPPLPSVKKLTEDRWNKPQKGGHGENPTMNGH*", + "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGATTAGAACATGGAACAGTCTAGTAAAACATCACAAGTATATCTCAAAGAAAGCTAAAAAGTGGCTTTATAGACATCATTATGAAAGCCAGAATCCAAAGGTAAGTTCAGAAGTACAGATCCCACTAGGAGAGGGTAGATTAATAATAAGAACATATTGGGGCCTGCAGACAGGAGAAAAGGACTGGCAATTGGGCCATGGAGTTTCTATAGAATGGAGGCTGAGAAAATATAACACACAAATAGATCCTGACCTAGCAGACCAACTGATTCATCTACACTATTTTGACTGTTTTTCAGACTCTGCCATAAGGAAAGCCATATTAGGACAAGTAGTTAGACATAGGTGTGACTATCCATCAGGACATAACAAGGTAGGATCCCTACAATATTTGGCACTAAAAGCATTAATAGCACCAAAAAAGACAAAACCACCTCTGCCTAGTGTGAAGAAATTAACAGAAGACAGATGGAACAAGCCCCAGAAAGGGGGCCACGGAGAGAACCCAACAATGAATGGACATTAG" + }, + { + "name": "vpr", + "start": 5690, + "end": 5917, + "orientation": "forward", + "distance": 0.7699443413729128, + "protein": "MYLPKTRMIPKKEIDHGIKVIPSP", + "aminoacids": "MKIRA*DVSIYLTNIESED*TTQSIPLLKNRRR*SLYRTKRIKSEGQSTG*MYLPKTRMIPKKEIDHGIKVIPSP*", + "nucleotides": "ATGAAGATTAGGGCTTAGGACGTTTCGATCTATTTAACGAACATTGAGTCAGAAGACTAAACAACACAGTCAATCCCACTGTTGAAAAACAGAAGGAGATAATCATTGTATAGGACGAAAAGGATTAAATCAGAGGGACAATCGACGGGGTAGATGTATCTTCCAAAGACGAGGATGATACCCAAGAAAGAGATTGACCATGGTATTAAAGTGATTCCCTCCCCATAA", + "subtype_start": 4764, + "subtype_end": 5051, + "subtype_aminoacids": "MEQAPERGPRREPNNEWTLELLEELKIEAVRHFPRPWLHGLGQYIYNTYGDTWEGVEAIIRMLQQLLFVHFRIGCQHSRIGIVPGRRGRNGAGRS*", + "subtype_nucleotides": "ATGGAACAAGCCCCAGAAAGGGGGCCACGGAGAGAACCCAACAATGAATGGACATTAGAACTGTTAGAGGAGCTTAAAATTGAAGCTGTTAGACATTTTCCTAGGCCCTGGCTTCATGGCTTAGGACAGTACATCTATAACACTTATGGGGACACTTGGGAAGGGGTTGAAGCTATAATAAGAATGTTGCAACAACTACTGTTTGTTCATTTCAGAATTGGGTGTCAACATAGCAGAATAGGCATTGTGCCAGGGAGAAGAGGCAGGAATGGAGCTGGTAGATCCTAA" + }, + { + "name": "tat_exon1", + "start": 5864, + "end": 6067, + "orientation": "forward", + "distance": 0.7688723205964585, + "protein": "MIPKKEIDHGIKVIPSP", + "aminoacids": "MIPKKEIDHGIKVIPSP*LFEGESLGPPNGYETGGTKGTKENIPVKFKSSERGIMIRKTPIKNVTETI", + "nucleotides": "ATGATACCCAAGAAAGAGATTGACCATGGTATTAAAGTGATTCCCTCCCCATAATTGTTTGAGGGTGAGTCCTTAGGTCCACCGAACGGTTATGAGACAGGTGGTACAAAGGGTACAAAGGAAAACATACCCGTCAAATTTAAATCCTCAGAAAGGGGTATAATGATACGAAAGACACCAATAAAAAACGTGACGGAGACAATT", + "subtype_start": 5032, + "subtype_end": 5247, + "subtype_aminoacids": "MELVDPNLEPWNHPGSQPTTNCSNCYCKKCCWHCQLCFLKKGLGISYGRKKRKHRRGTPQSSKDHQNPIPKQ", + "subtype_nucleotides": "ATGGAGCTGGTAGATCCTAACCTAGAGCCCTGGAATCATCCGGGAAGTCAGCCTACAACTAATTGTAGCAATTGTTACTGTAAAAAATGTTGCTGGCATTGCCAACTATGCTTTCTGAAAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAAGCACCGACGAGGAACTCCTCAGAGCAGTAAGGATCATCAAAATCCTATACCAAAGCAG" + }, + { + "name": "rev_exon1", + "start": 5864, + "end": 6070, + "orientation": "forward", + "distance": 0.7648970747562297, + "protein": "MIPKKEIDHGIKVIPSP", + "aminoacids": "MIPKKEIDHGIKVIPSP*LFEGESLGPPNGYETGGTKGTKENIPVKFKSSERGIMIRKTPIKNVTETIN", + "nucleotides": "ATGATACCCAAGAAAGAGATTGACCATGGTATTAAAGTGATTCCCTCCCCATAATTGTTTGAGGGTGAGTCCTTAGGTCCACCGAACGGTTATGAGACAGGTGGTACAAAGGGTACAAAGGAAAACATACCCGTCAAATTTAAATCCTCAGAAAGGGGTATAATGATACGAAAGACACCAATAAAAAACGTGACGGAGACAATTAAC", + "subtype_start": 5171, + "subtype_end": 5248, + "subtype_aminoacids": "MAGRSGSTDEELLRAVRIIKILYQSS", + "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAAGCACCGACGAGGAACTCCTCAGAGCAGTAAGGATCATCAAAATCCTATACCAAAGCAGT" + }, + { + "name": "vpu", + "start": 6074, + "end": 6406, + "orientation": "forward", + "distance": 0.7604365838113448, + "protein": "MTKKILERQKTVKIETKKTITIMKTITKPRRFLKCINGMKLGTHLD", + "aminoacids": "M**SHPWGVRTYKRTKV*KFTENYLNYTGNRNGDEDIKTIIQKTTQYYVRYMTKKILERQKTVKIETKKTITIMKTITKPRRFLKCINGMKLGTHLD*TG*VKGVIEDIQ*", + "nucleotides": "ATGTAGTAATCACACCCGTGGGGAGTAAGAACGTATAAAAGGACAAAAGTCTAAAAATTTACCGAGAACTATTTAAACTATACAGGTAACCGGAACGGGGACGAAGACATAAAGACGATAATTCAGAAAACTACCCAGTATTATGTGAGGTACATGACCAAGAAAATCTTAGAGAGACAAAAGACGGTCAAGATCGAGACGAAGAAGACAATCACCATAATGAAGACAATCACGAAACCAAGGAGATTCCTCAAATGTATTAACGGAATGAAATTAGGGACCCATTTAGACTGAACGGGTTAAGTTAAAGGGGTGATTGAAGACATACAGTAA", + "subtype_start": 5267, + "subtype_end": 5512, + "subtype_aminoacids": "MTPLEISAIVGLIVALISAIVVWTIVAIEAKKLLRQRKIDRLVKRIRERAEDSGNESEGDTEEWAKLVEMGDFDPWVGDNL*", + "subtype_nucleotides": "ATGACACCTTTGGAAATTAGTGCAATAGTAGGATTGATAGTAGCGCTAATCTCAGCAATAGTAGTGTGGACTATAGTAGCTATAGAAGCTAAGAAATTACTAAGGCAAAGAAAAATAGACAGGTTAGTTAAGAGAATAAGAGAAAGAGCAGAAGACAGTGGAAATGAGAGTGAAGGGGACACAGAGGAATGGGCCAAACTTGTGGAAATGGGGGACTTTGATCCTTGGGTTGGTGATAATTTGTAG" + }, + { + "name": "env", + "start": 6216, + "end": 8717, + "orientation": "forward", + "distance": 0.7741801753020409, + "protein": "MLDPSVKPGCSKDSRLKNGGHFERAENLKISWPDVSEISQGNQEQNTGLTTIPICKNDKIN", + "aminoacids": "M*GT*PRKS*RDKRRSRSRRRRQSP**RQSRNQGDSSNVLTE*N*GPI*TERVKLKG*LKTYSNCQVDRKRPS*YPT*QVNSPTSSIG*VSLPPRKTTKNRPHHSGVELSTTESRR*KQDTTG*RFSLGCMFSRYITIY*YRPKTKDFPRF*KQYDETL*RPLGKVGTPSCNMTID*GPQSNKYDPYHLRHMKDFRSRFP*LFIRSGCRS*Q*LKKKKLGRPTP*GLT*RVFRTQENNSRDFR*LKEVNHDRKKKYRL*PHNIPKSPG*KL*KGRKR*RHV*R*LRK*KRRQLPVTN*KPGR*GPKLK*PCQSYPDYPFKFHVG*TQLSKEG*YNCPHPG*L*HGYRNTGV*RYS*TSMTE*NYFGG*GG**KPKVEGPFE*RRL*HSRRGHRLSRRKSTGG*K*HCSPATVSHWTPFNFLCQGTDSRGRRLPLNNRDGVWTSREDHPDNRDQTRLLLRDRKGTSFRSRRDFLIGQRVMLESKPQEGKV*RLSGKRIPGTLKTDTREETVLTL*IVRKKPRILTERRSNTIDLNNEPSNEVGFENGIPAQEDEGLYDSSKEDHIGDQGYENFIRMLDPSVKPGCSKDSRLKNGGHFERAENLKISWPDVSEISQGNQEQNTGLTTIPICKNDKIN*GPNR*KIFKEDDPIHLINSR*DKQGLPMIIKDDTVKGNQESRPDHVIRDVRDLREIG*DVEGVTTRENCKRTDELQGGDTNRTTNLEHPTEEDYYDFCTHSEDPTFGREDDENGYVNFKIHYTGLHGKRGPTRRDIPLKPTGLTTQDTVDDERHEKRMKTKREGDRTDFEGTTENRDRKLRVLSPNDDIIYY*IQED*", + "nucleotides": "ATGTGAGGTACATGACCAAGAAAATCTTAGAGAGACAAAAGACGGTCAAGATCGAGACGAAGAAGACAATCACCATAATGAAGACAATCACGAAACCAAGGAGATTCCTCAAATGTATTAACGGAATGAAATTAGGGACCCATTTAGACTGAACGGGTTAAGTTAAAGGGGTGATTGAAGACATACAGTAACTGTCAGGTCGACAGAAAAAGACCGTCGTGATATCCGACATGACAGGTAAATAGTCCTACCTCAAGTATTGGGTAGGTTTCCTTACCTCCAAGAAAGACTACAAAAAACAGACCACACCATTCAGGGGTGGAGTTGTCTACAACAGAGTCGAGGAGATAAAAACAAGATACGACGGGATAAAGATTCAGTCTAGGATGTATGTTTAGTAGGTACATAACTATCTATTGATACAGACCTAAAACAAAAGATTTTCCGAGATTCTAAAAACAGTACGATGAAACCTTATAACGACCACTAGGAAAGGTAGGGACACCTTCGTGTAACATGACTATAGATTAGGGACCACAGAGTAACAAATATGATCCATACCATTTACGTCATATGAAGGACTTCAGAAGTAGATTCCCTTGACTTTTTATACGTAGTGGGTGTAGGTCATGACAATGACTAAAAAAGAAAAAATTGGGACGCCCTACACCATAAGGATTAACTTGAAGGGTCTTCAGAACTCAAGAGAATAATTCAAGAGACTTTAGATGATTAAAAGAGGTAAATCATGACAGAAAAAAGAAATACCGTTTATGACCTCATAACATACCTAAAAGTCCGGGTTAAAAACTTTAAAAGGGAAGGAAAAGGTAGAGACATGTTTAAAGATGATTACGAAAATAAAAAAGAAGACAGTTACCGGTAACAAATTGAAAACCCGGTAGGTAAGGACCGAAATTAAAATGACCATGTCAGAGTTATCCCGATTACCCTTTTAAATTTCACGTTGGTTAGACTCAGTTGTCTAAAGAAGGTTAATACAACTGTCCACATCCAGGATGATTATGACATGGATATCGAAATACAGGTGTCTAAAGATACTCATAGACTAGTATGACAGAATGAAACTATTTTGGAGGTTAAGGGGGATAGTAAAAACCAAAGGTAGAAGGACCGTTTGAGTAAAGAAGATTATGACATAGTAGACGAGGACATAGATTATCTCGAAGGAAATCAACGGGGGGATAGAAATAACACTGCTCCCCAGCAACGGTTTCTCACTGGACTCCCTTCAATTTCCTATGTCAAGGAACAGATAGCCGAGGACGAAGACTCCCCCTCAACAACAGAGATGGGGTCTGGACTTCGAGAGAAGACCACCCCGACAACCGAGACCAGACGAGACTTCTTTTAAGGGACCGGAAGGGAACATCCTTCCGGTCTAGAAGGGATTTTTTAATCGGACAGAGAGTCATGTTAGAAAGTAAACCACAGGAAGGAAAGGTGTAAAGGTTGTCGGGAAAAAGGATCCCCGGGACGTTAAAGACCGACACACGGGAAGAAACGGTGTTAACTTTGTGAATTGTTAGAAAGAAACCAAGGATTTTAACGGAGAGACGTAGTAATACCATCGACTTAAACAATGAACCGAGTAACGAAGTCGGTTTTGAGAACGGAATACCGGCCCAGGAGGATGAGGGACTGTACGACAGTAGTAAAGAAGATCACATCGGCGACCAGGGTTACGAAAATTTTATCAGAATGTTAGACCCAAGCGTAAAACCTGGTTGTTCCAAAGACAGTAGGTTAAAAAATGGAGGACACTTCGAACGAGCCGAGAATCTCAAAATATCTTGGCCAGATGTATCAGAGATTTCCCAAGGAAACCAGGAACAGAATACAGGTCTTACGACCATCCCGATATGTAAGAATGATAAAATAAATTAGGGTCCTAATAGGTAGAAAATATTTAAAGAGGATGACCCTATCCACCTAATAAACAGTAGGTAGGATAAACAAGGACTTCCCATGATCATCAAGGACGATACAGTGAAGGGGAACCAAGAGAGTAGACCGGACCACGTTATCCGGGACGTACGTGACCTACGTGAGATAGGGTAAGACGTCGAAGGAGTAACTACCAGAGAAAATTGTAAACGTACCGACGAACTACAGGGGGGTGACACAAATCGTACCACAAATTTAGAACACCCCACCGAGGAAGACTATTACGACTTTTGTACCCATAGTGAAGACCCGACTTTCGGAAGAGAAGATGATGAAAATGGGTACGTAAATTTCAAGATCCACTATACCGGACTACATGGTAAACGGGGACCTACAAGACGTGATATCCCATTAAAACCGACTGGACTAACGACACAGGACACAGTCGACGACGAACGACACGAAAAAAGAATGAAAACAAAACGAGAAGGAGATAGAACAGATTTCGAAGGAACCACAGAAAATAGAGATAGGAAACTACGTGTGTTATCTCCCAACGATGACATAATATATTACTAGATTCAAGAAGACTAG", + "subtype_start": 5430, + "subtype_end": 8006, + "subtype_aminoacids": "MRVKGTQRNGPNLWKWGTLILGLVIICSASDNLWVTVYYGVPVWRDADTTLFCASDAKAHETEVHNVWATHACVPTDPSPQEIYLENVTENFNMWKNNMVEQMQEDVISLWDQSLKPCVKLTPLCVTLNCSNANLTNINNTITDKIGNLTIGNITDDIKNCSFNMTTELRDKKKKAYALFYKLDIVSIEKNTSEYRLINCNSSVIKQACPKISFDPIPIHYCTPAGYAILKCNDKKFNGTGPCKNVSSVQCTHGIKPVVSTQLLLNGSLAEEEIIISSKNLTNNANTIIVHLNKSVEINCTRPSNNTRTSVRIGPGQVFYGTGDIIGDPRKAYCQINGTNWNKALKQVTGKLKEHFQNKTINFQPHSGGDPEITTHHFNCRGEFFYCNTTRLFNNTCIKNATVRGCNDTIILPCRIKQIINMWQEAGQAMYAPPISGIINCVSNITGILLTRDGGNSSTNETFRPEGGNIKDNWRSELYKYKVVQIEPLGIAPTRAKRRVVERQKRAVGIGAMIFGFLGAAGSTMGAASITLTVQARQLLSGIVQQQSNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLKDQKFLGLWGCSGKIICTTAVPWNSTWSNKSYEDIWNNLTWTEWEREISNYTNQIYALLTESQDQQDRNEMDLLKLDQWASLWNWFDITNWLWYIKIFIMIVGGLIGLRIIFAVLSIVNRVRQGYSPLSFQTPSHHQREPDRPEGIEEGGGEQGRDRSVRLVSGFLALAWDDLRSLCLFSYHRLRDFILIATRTVELLGHSSLKGLRRGWEGLKYLGNLLLYWGQELRISAISLLDAIAIAVAGWTDRVIEIVQGAWRAFIHIPRRIRQGLERILL*", + "subtype_nucleotides": "ATGAGAGTGAAGGGGACACAGAGGAATGGGCCAAACTTGTGGAAATGGGGGACTTTGATCCTTGGGTTGGTGATAATTTGTAGTGCCTCAGACAACTTGTGGGTTACAGTCTATTATGGGGTTCCTGTGTGGAGAGATGCAGATACCACCCTATTTTGTGCATCAGATGCCAAAGCACATGAGACAGAAGTGCACAATGTCTGGGCCACACATGCCTGTGTACCCACAGACCCCAGCCCACAAGAAATATACCTGGAAAATGTAACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAGCAGATGCAGGAGGATGTAATCAGTTTATGGGATCAAAGTCTAAAGCCATGTGTAAAATTAACTCCTCTCTGTGTTACTTTAAATTGTAGCAATGCTAACTTAACCAACATTAACAACACGATCACTGACAAAATAGGAAATCTAACAATAGGAAATATAACAGATGACATAAAGAACTGTTCTTTTAACATGACCACAGAACTAAGAGATAAGAAGAAGAAGGCTTATGCACTTTTTTATAAGCTTGATATAGTATCAATTGAAAAGAATACAAGTGAGTATAGGTTAATAAATTGTAATTCTTCAGTCATTAAGCAGGCTTGTCCAAAGATATCATTTGATCCAATTCCCATACATTATTGTACTCCAGCTGGTTATGCGATTTTAAAGTGTAATGATAAGAAGTTCAATGGGACAGGACCATGTAAAAATGTCAGCTCAGTACAATGCACACATGGAATTAAGCCAGTGGTGTCAACTCAATTACTGTTAAATGGCAGTCTAGCAGAAGAAGAAATAATAATCAGCTCTAAAAATCTCACAAACAATGCCAACACCATAATAGTGCACCTTAATAAATCTGTAGAAATCAATTGTACCAGACCCTCCAACAATACAAGAACAAGTGTACGTATAGGACCAGGACAAGTGTTTTATGGAACAGGAGACATAATAGGAGATCCAAGAAAAGCATATTGTCAGATTAATGGAACAAATTGGAATAAAGCTTTGAAACAGGTAACTGGAAAATTAAAAGAGCACTTTCAGAATAAGACAATAAACTTTCAACCACACTCAGGAGGAGATCCAGAAATTACAACACATCATTTTAATTGTAGAGGGGAATTTTTCTATTGCAATACAACACGACTGTTTAACAATACTTGCATAAAAAATGCAACCGTGAGGGGATGTAATGACACTATCATACTTCCATGCAGGATAAAGCAAATTATAAACATGTGGCAGGAAGCAGGACAAGCAATGTATGCTCCTCCCATCAGTGGAATAATAAATTGTGTATCAAATATTACAGGAATACTATTGACAAGAGATGGTGGGAATAGTAGTACTAACGAGACCTTCAGACCTGAAGGAGGAAATATAAAAGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTACAAATTGAACCACTAGGAATAGCACCCACCAGGGCAAAGAGAAGAGTGGTGGAGCGACAAAAAAGAGCAGTGGGAATAGGAGCTATGATCTTTGGGTTCTTAGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATAACGCTGACGGTACAGGCCAGACAATTATTATCTGGTATAGTGCAACAGCAAAGCAATTTGCTGAGGGCTATAGAGGCGCAGCAGCATATGTTGCAACTCACAGTCTGGGGCATTAAACAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTAAAGGATCAAAAGTTCCTAGGACTTTGGGGCTGCTCTGGAAAAATCATTTGCACCACTGCTGTGCCCTGGAACTCCACTTGGAGTAATAAATCTTATGAAGATATTTGGAACAACCTGACATGGACAGAATGGGAGAGAGAAATTAGTAATTACACAAACCAAATATATGCGCTACTTACAGAATCACAGGACCAGCAGGACAGAAATGAAATGGATTTGTTAAAATTGGATCAATGGGCAAGTCTGTGGAATTGGTTTGACATAACAAATTGGCTGTGGTATATAAAAATATTTATAATGATAGTAGGAGGCTTAATAGGTCTAAGAATAATTTTTGCTGTGCTTTCTATAGTAAATAGAGTTAGGCAGGGATACTCACCTTTGTCTTTCCAGACCCCTTCCCATCATCAGAGGGAACCCGACAGGCCAGAAGGAATCGAAGAAGGAGGTGGCGAGCAAGGCAGAGACAGATCCGTGCGATTAGTGAGCGGATTCTTAGCACTTGCCTGGGACGATCTACGGAGCCTGTGCCTCTTCAGTTACCACCGCTTGAGAGACTTCATCTTGATTGCAACGAGGACTGTGGAACTTCTGGGACACAGCAGTCTCAAGGGACTGAGACGGGGGTGGGAAGGTCTCAAATATCTGGGGAACCTTCTGTTATATTGGGGCCAGGAACTAAGAATTAGTGCTATTTCTTTGCTTGATGCTATAGCAATAGCAGTAGCGGGGTGGACAGATAGGGTTATAGAAATAGTACAAGGAGCTTGGAGAGCTTTTATCCACATACCTAGAAGAATTAGACAAGGCTTAGAAAGGATTTTGCTATAA" + }, + { + "name": "tat_exon2", + "start": 8199, + "end": 8297, + "orientation": "forward", + "distance": 0.7588506112081282, + "protein": "LPMIIKDDTVKGNQESRPDHVIRDVRDLREIG", + "aminoacids": "LPMIIKDDTVKGNQESRPDHVIRDVRDLREIG*", + "nucleotides": "CTTCCCATGATCATCAAGGACGATACAGTGAAGGGGAACCAAGAGAGTAGACCGGACCACGTTATCCGGGACGTACGTGACCTACGTGAGATAGGGTAA", + "subtype_start": 7567, + "subtype_end": 7662, + "subtype_aminoacids": "RPLPIIRGNPTGQKESKKEVASKAETDPCD**", + "subtype_nucleotides": "AGACCCCTTCCCATCATCAGAGGGAACCCGACAGGCCAGAAGGAATCGAAGAAGGAGGTGGCGAGCAAGGCAGAGACAGATCCGTGCGATTAGTGA" + }, + { + "name": "rev_exon2", + "start": 8199, + "end": 8702, + "orientation": "forward", + "distance": 0.7643157554189582, + "protein": "DVEGVTTRENCKRTDELQGGDTNRTTNLEHPTEEDYYDFCTHSEDPTFGREDDENGYVNFKIHYTGLHGKRGPTRRDIPLKPTGLTTQDTVDDERHEKRMKTKREGDRTDFEGTTENRDRKLRVLSPNDDIIYY", + "aminoacids": "LPMIIKDDTVKGNQESRPDHVIRDVRDLREIG*DVEGVTTRENCKRTDELQGGDTNRTTNLEHPTEEDYYDFCTHSEDPTFGREDDENGYVNFKIHYTGLHGKRGPTRRDIPLKPTGLTTQDTVDDERHEKRMKTKREGDRTDFEGTTENRDRKLRVLSPNDDIIYY*", + "nucleotides": "CTTCCCATGATCATCAAGGACGATACAGTGAAGGGGAACCAAGAGAGTAGACCGGACCACGTTATCCGGGACGTACGTGACCTACGTGAGATAGGGTAAGACGTCGAAGGAGTAACTACCAGAGAAAATTGTAAACGTACCGACGAACTACAGGGGGGTGACACAAATCGTACCACAAATTTAGAACACCCCACCGAGGAAGACTATTACGACTTTTGTACCCATAGTGAAGACCCGACTTTCGGAAGAGAAGATGATGAAAATGGGTACGTAAATTTCAAGATCCACTATACCGGACTACATGGTAAACGGGGACCTACAAGACGTGATATCCCATTAAAACCGACTGGACTAACGACACAGGACACAGTCGACGACGAACGACACGAAAAAAGAATGAAAACAAAACGAGAAGGAGATAGAACAGATTTCGAAGGAACCACAGAAAATAGAGATAGGAAACTACGTGTGTTATCTCCCAACGATGACATAATATATTACTAG", + "subtype_start": 7568, + "subtype_end": 7864, + "subtype_aminoacids": "DPFPSSEGTRQARRNRRRRWRARQRQIRAISERILSTCLGRSTEPVPLQLPPLERLHLDCNEDCGTSGTQQSQGTETGVGRSQISGEPSVILGPGTKN*", + "subtype_nucleotides": "GACCCCTTCCCATCATCAGAGGGAACCCGACAGGCCAGAAGGAATCGAAGAAGGAGGTGGCGAGCAAGGCAGAGACAGATCCGTGCGATTAGTGAGCGGATTCTTAGCACTTGCCTGGGACGATCTACGGAGCCTGTGCCTCTTCAGTTACCACCGCTTGAGAGACTTCATCTTGATTGCAACGAGGACTGTGGAACTTCTGGGACACAGCAGTCTCAAGGGACTGAGACGGGGGTGGGAAGGTCTCAAATATCTGGGGAACCTTCTGTTATATTGGGGCCAGGAACTAAGAATTAG" + }, + { + "name": "nef", + "start": 8757, + "end": 9611, + "orientation": "forward", + "distance": 0.7666383193171439, + "protein": "MSVFRRRIYVLDSRAVRGQGGSGAEGPFRGRLSGNIVRATVVKNFMRPTSRARYTTLRSADSLEVRL", + "aminoacids": "MSEDYKDCPVLIDA*QDRGTNGYDIQN*I*KRKGDRNWLKKG*LD*EGANYDCESVGRERKIGGDQF*KPHEWSAAGSGERHARSRSAQDAALSRRPKGKRKSRDKPAVTISKRCD*FSQTP*RSMVSVCCLPVCDELREFRSK*LRIRHPRDQSVSRGSESRPDWSLWVMSVFRRRIYVLDSRAVRGQGGSGAEGPFRGRLSGNIVRATVVKNFMRPTSRARYTTLRSADSLEVRL*RERPSR*GTSECPTLFDHKRGNNRRRLKRPS*P*SNIVVGFQSPID*", + "nucleotides": "ATGTCGGAAGACTACAAAGATTGTCCGGTCCTAATTGACGCTTAGCAAGATCGAGGGACGAACGGGTATGATATACAAAATTAAATATAAAAAAGAAAGGGGGACCGGAATTGGCTTAAAAAAGGGTAGCTAGATTAAGAGGGGGCGAATTATGACTGCGAGAGCGTGGGTAGAGAGAGGAAGATCGGAGGCGATCAGTTTTAAAAACCGCATGAGTGGTCAGCGGCGGGGAGCGGAGAACGGCACGCGCGAAGTCGTTCGGCTCAGGACGCAGCTCTCTCGAGGAGACCAAAGGGAAAGCGAAAGTCCAGGGACAAGCCCGCGGTGACGATCTCTAAAAGGTGTGACTGATTTTCCCAGACTCCCTAGAGATCAATGGTCTCAGTGTGTTGTCTGCCCGTGTGTGATGAACTTCGTGAGTTCCGTTCGAAATAACTCCGAATTCGTCACCCAAGGGATCAATCGGTCTCTCGAGGGTCCGAGTCTAGACCAGATTGGTCTCTCTGGGTCATGTCCGTTTTTCGTCGACGAATATACGTCCTAGACTCCCGAGCGGTGAGGGGTCAGGGCGGGTCCGGTGCGGAGGGACCTTTCAGGGGTCGCCTTTCAGGGAACATCGTTCGAGCTACAGTCGTCAAGAACTTCATGAGGCCTACGTCGAGAGCCCGGTACACTACTTTACGATCCGCCGACAGTTTGGAGGTGAGATTGTGAAGAGAGAGGCCCAGTAGGTAAGGTACGTCCGAGTGTCCCACATTGTTCGACCACAAGAGAGGAAACAACCGAAGAAGATTGAAGAGACCGAGTTGACCATGATCGAACATCGTGGTAGGTTTCCAGTCACCTATAGACTAG", + "subtype_start": 8008, + "subtype_end": 8682, + "subtype_aminoacids": "MGSKWSKSSIVGWPQVREKIKKTPPAAEGVGAVSQDLDKHGAAEGVGAVSRDLDKHGAVTSSNMNNADNVWLRAQEEEGDDEGVGFPVRPQVPLRPMTFKGAFDLSFFLKEKGGLDGLIYSKKRQEILDLWVYHTQGFFPDWQNYTPGPGTRFPLCFGWCFKLVPVDPSTVEEDNKGENNCLLHPMSQHGIEDEEREVLIWKFDSALARKHIARELHPEYYKDC*", + "subtype_nucleotides": "ATGGGGAGTAAGTGGTCAAAAAGTAGCATAGTGGGATGGCCTCAGGTCAGGGAAAAAATAAAGAAAACTCCTCCAGCAGCAGAAGGAGTAGGAGCAGTATCTCAAGATCTAGATAAACATGGAGCAGCAGAAGGAGTAGGAGCAGTATCTCGAGATCTAGATAAACATGGAGCAGTAACAAGTAGTAATATGAATAATGCTGATAATGTCTGGCTGAGAGCACAAGAAGAAGAAGGGGACGACGAGGGGGTAGGCTTTCCAGTCAGGCCGCAGGTACCTCTAAGACCAATGACTTTTAAGGGAGCTTTTGATCTTAGCTTCTTTTTAAAAGAAAAGGGGGGACTGGATGGGCTAATTTACTCCAAGAAAAGACAAGAGATCCTTGATTTATGGGTCTACCATACACAAGGCTTCTTCCCTGATTGGCAAAACTACACACCGGGGCCAGGGACCAGATTCCCACTGTGTTTTGGATGGTGCTTCAAGTTAGTACCAGTTGACCCAAGCACAGTAGAGGAAGACAACAAAGGAGAAAACAACTGCCTGTTACACCCCATGAGCCAGCATGGAATAGAGGACGAAGAAAGAGAAGTGCTGATATGGAAGTTTGACAGTGCCCTAGCACGAAAACACATAGCCCGAGAACTGCATCCAGAGTACTATAAAGACTGCTGA" + } + ], "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.WITH-XS[REVERSE_COMPLEMENT]": [ { "name": "gag",