From 04596a70a5ab085cf6c8a73078ea72c8686aadf7 Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Mon, 29 Jul 2024 15:19:28 -0700 Subject: [PATCH] Rename "error" to "code" in defects.csv --- src/cfeintact/defect.py | 2 +- src/cfeintact/intact.py | 2 +- tests/expected-results-edgy/defects.json | 65 +- tests/expected-results-edgy/holistic.json | 17 +- tests/expected-results-edgy/regions.json | 159 +- tests/expected-results-large-csv/defects.csv | 261 - tests/expected-results-large-csv/holistic.csv | 41 - tests/expected-results-large-csv/regions.csv | 451 -- .../expected-results-large-csv/subtypes.fasta | 451 -- .../expected-results-large-hxb2/defects.json | 1640 +---- .../expected-results-large-hxb2/holistic.json | 577 +- .../expected-results-large-hxb2/regions.json | 6399 +---------------- tests/expected-results-large/defects.json | 1640 +---- tests/expected-results-large/holistic.json | 577 +- tests/expected-results-large/regions.json | 6399 +---------------- tests/expected-results-large/subtypes.fasta | 451 -- tests/expected-results-small-csv/defects.csv | 85 - tests/expected-results-small-csv/holistic.csv | 5 - tests/expected-results-small-csv/regions.csv | 55 - tests/expected-results-small/defects.json | 522 +- tests/expected-results-small/holistic.json | 73 +- tests/expected-results-small/regions.json | 783 +- 22 files changed, 14 insertions(+), 20641 deletions(-) diff --git a/src/cfeintact/defect.py b/src/cfeintact/defect.py index 07a8235..aa351e1 100644 --- a/src/cfeintact/defect.py +++ b/src/cfeintact/defect.py @@ -177,4 +177,4 @@ def __str__(self) -> str: @dataclass(frozen=True) class Defect: qseqid: str - error: DefectType + code: DefectType diff --git a/src/cfeintact/intact.py b/src/cfeintact/intact.py index 2a53ba0..3a263bb 100644 --- a/src/cfeintact/intact.py +++ b/src/cfeintact/intact.py @@ -624,7 +624,7 @@ def write(self, sequence, subtype, is_intact, regions, defects, holistic): defects_dicts = [{ "qseqid": d.qseqid, - "error": d.error.__class__.__name__, + "code": d.error.__class__.__name__, "message": str(d.error), "region": d.error.q.name if isinstance(d.error, ORFDefect) else None, } for d in defects] diff --git a/tests/expected-results-edgy/defects.json b/tests/expected-results-edgy/defects.json index a37cb1e..9e26dfe 100644 --- a/tests/expected-results-edgy/defects.json +++ b/tests/expected-results-edgy/defects.json @@ -1,64 +1 @@ -{ - "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455": [ - { - "qseqid": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455", - "error": "Frameshift", - "message": "ORF 'vpr' at 5559-5850 contains out of frame indels that impact 61 positions.", - "region": "vpr" - }, - { - "qseqid": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455", - "error": "InternalStop", - "message": "ORF 'vpr' at 5559-5850 contains an internal stop codon at 5793.", - "region": "vpr" - }, - { - "qseqid": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455", - "error": "MutatedStopCodon", - "message": "ORF 'vpr' has a mutated stop codon: 'GAC'.", - "region": "vpr" - }, - { - "qseqid": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455", - "error": "Deletion", - "message": "ORF 'vpu' exceeds maximum deletion tolerance. Contains 30 deletions with max tolerance of 6 deletions.", - "region": "vpu" - }, - { - "qseqid": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455", - "error": "SequenceDivergence", - "message": "ORF 'vpu' exceeds maximum distance tolerance. It is 2.00873 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.07949.", - "region": "vpu" - }, - { - "qseqid": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455", - "error": "MutatedStartCodon", - "message": "ORF 'vpu' has a mutated start codon: 'ACG'.", - "region": "vpu" - }, - { - "qseqid": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455", - "error": "Frameshift", - "message": "ORF 'tat_exon2' at 8377-8469 contains out of frame indels that impact 32 positions.", - "region": "tat_exon2" - }, - { - "qseqid": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455", - "error": "Deletion", - "message": "ORF 'tat_exon2' exceeds maximum deletion tolerance. Contains 45 deletions with max tolerance of 0 deletions.", - "region": "tat_exon2" - }, - { - "qseqid": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455", - "error": "Frameshift", - "message": "ORF 'nef' at 8797-9417 contains out of frame indels that impact 168 positions.", - "region": "nef" - }, - { - "qseqid": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455", - "error": "InternalStop", - "message": "ORF 'nef' at 8797-9417 contains an internal stop codon at 9166.", - "region": "nef" - } - ] -} \ No newline at end of file +{} \ No newline at end of file diff --git a/tests/expected-results-edgy/holistic.json b/tests/expected-results-edgy/holistic.json index f299676..9e26dfe 100644 --- a/tests/expected-results-edgy/holistic.json +++ b/tests/expected-results-edgy/holistic.json @@ -1,16 +1 @@ -{ - "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455": { - "intact": false, - "qlen": 9719, - "hypermutation_probablility": 0.0, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9719, - "blast_sseq_coverage": 1.130479522535501, - "blast_qseq_coverage": 1.1304660973351168, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 3, - "is_reverse_complement": false - } -} \ No newline at end of file +{} \ No newline at end of file diff --git a/tests/expected-results-edgy/regions.json b/tests/expected-results-edgy/regions.json index b37557d..9e26dfe 100644 --- a/tests/expected-results-edgy/regions.json +++ b/tests/expected-results-edgy/regions.json @@ -1,158 +1 @@ -{ - "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455": [ - { - "region": "gag", - "start": 789, - "end": 2291, - "orientation": "forward", - "distance": 0.0, - "indel_impact": 0, - "protein": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ", - "nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 2084, - "end": 5095, - "orientation": "forward", - "distance": 0.0, - "indel_impact": 0, - "protein": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 5040, - "end": 5618, - "orientation": "forward", - "distance": 0.0, - "indel_impact": 0, - "protein": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH", - "nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5558, - "end": 5849, - "orientation": "forward", - "distance": 0.5468749999999982, - "indel_impact": 61, - "protein": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFQNWVST", - "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5830, - "end": 6044, - "orientation": "forward", - "distance": 0.0, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5969, - "end": 6044, - "orientation": "forward", - "distance": 0.0, - "indel_impact": 0, - "protein": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 6061, - "end": 6309, - "orientation": "forward", - "distance": 2.0087301587301587, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKQ", - "nucleotides": "ACGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 6224, - "end": 8794, - "orientation": "forward", - "distance": 0.003504672897196359, - "indel_impact": 0, - "protein": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGIEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL", - "nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8376, - "end": 8468, - "orientation": "forward", - "distance": 0.09677419354838701, - "indel_impact": 32, - "protein": "RPTSQPRGDPTGPKE", - "nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8377, - "end": 8652, - "orientation": "forward", - "distance": 0.0, - "indel_impact": 0, - "protein": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE", - "nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8796, - "end": 9416, - "orientation": "forward", - "distance": 0.8927184466019373, - "indel_impact": 168, - "protein": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPD", - "nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTAGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ] -} \ No newline at end of file +{} \ No newline at end of file diff --git a/tests/expected-results-large-csv/defects.csv b/tests/expected-results-large-csv/defects.csv index cc0be84..6e1bdce 100644 --- a/tests/expected-results-large-csv/defects.csv +++ b/tests/expected-results-large-csv/defects.csv @@ -1,262 +1 @@ qseqid,error,message,region -KX505501.1,Frameshift,ORF 'pol' at 1629-1746 contains out of frame indels that impact 1950 positions.,pol -KX505501.1,Deletion,ORF 'pol' exceeds maximum deletion tolerance. Contains 2892 deletions with max tolerance of 93 deletions.,pol -KX505501.1,SequenceDivergence,ORF 'pol' exceeds maximum distance tolerance. It is 2.13586 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.88345.,pol -KX505501.1,MutatedStopCodon,ORF 'pol' has a mutated stop codon: 'T--'.,pol -KX505501.1,Frameshift,ORF 'env' at 1747-1746 contains out of frame indels that impact 1714 positions.,env -KX505501.1,Deletion,ORF 'env' exceeds maximum deletion tolerance. Contains 2346 deletions with max tolerance of 54 deletions.,env -KX505501.1,SequenceDivergence,ORF 'env' exceeds maximum distance tolerance. It is 2.11186 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.35595.,env -KX505501.1,Frameshift,ORF 'vif' at 1747-1746 contains out of frame indels that impact 386 positions.,vif -KX505501.1,Deletion,ORF 'vif' exceeds maximum deletion tolerance. Contains 354 deletions with max tolerance of 12 deletions.,vif -KX505501.1,SequenceDivergence,ORF 'vif' exceeds maximum distance tolerance. It is 2.04883 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.78802.,vif -KX505501.1,Frameshift,ORF 'vpr' at 1747-1746 contains out of frame indels that impact 194 positions.,vpr -KX505501.1,Deletion,ORF 'vpr' exceeds maximum deletion tolerance. Contains 66 deletions with max tolerance of 6 deletions.,vpr -KX505501.1,SequenceDivergence,ORF 'vpr' exceeds maximum distance tolerance. It is 2.00365 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.58981.,vpr -KX505501.1,Frameshift,ORF 'tat_exon1' at 1747-1746 contains out of frame indels that impact 144 positions.,tat_exon1 -KX505501.1,Deletion,ORF 'tat_exon1' exceeds maximum deletion tolerance. Contains 69 deletions with max tolerance of 0 deletions.,tat_exon1 -KX505501.1,SequenceDivergence,ORF 'tat_exon1' exceeds maximum distance tolerance. It is 2.21806 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.85696.,tat_exon1 -KX505501.1,Frameshift,ORF 'rev_exon1' at 1747-1746 contains out of frame indels that impact 51 positions.,rev_exon1 -KX505501.1,Insertion,ORF 'rev_exon1' exceeds maximum insertion tolerance. Contains 69 insertions with max tolerance of 0 insertions.,rev_exon1 -KX505501.1,SequenceDivergence,ORF 'rev_exon1' exceeds maximum distance tolerance. It is 2.25 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.92308.,rev_exon1 -KX505501.1,Frameshift,ORF 'vpu' at 1747-1746 contains out of frame indels that impact 166 positions.,vpu -KX505501.1,Deletion,ORF 'vpu' exceeds maximum deletion tolerance. Contains 24 deletions with max tolerance of 6 deletions.,vpu -KX505501.1,SequenceDivergence,ORF 'vpu' exceeds maximum distance tolerance. It is 2.044 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.07949.,vpu -KX505501.1,Frameshift,ORF 'tat_exon2' at 1747-1746 contains out of frame indels that impact 62 positions.,tat_exon2 -KX505501.1,Deletion,ORF 'tat_exon2' exceeds maximum deletion tolerance. Contains 15 deletions with max tolerance of 0 deletions.,tat_exon2 -KX505501.1,SequenceDivergence,ORF 'tat_exon2' exceeds maximum distance tolerance. It is 2.24194 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.18333.,tat_exon2 -KX505501.1,Frameshift,ORF 'rev_exon2' at 1747-1746 contains out of frame indels that impact 184 positions.,rev_exon2 -KX505501.1,Deletion,ORF 'rev_exon2' exceeds maximum deletion tolerance. Contains 198 deletions with max tolerance of 7 deletions.,rev_exon2 -KX505501.1,SequenceDivergence,ORF 'rev_exon2' exceeds maximum distance tolerance. It is 2.21413 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.75773.,rev_exon2 -KX505501.1,Frameshift,ORF 'nef' at 1747-1778 contains out of frame indels that impact 425 positions.,nef -KX505501.1,Deletion,ORF 'nef' exceeds maximum deletion tolerance. Contains 396 deletions with max tolerance of 48 deletions.,nef -KX505501.1,SequenceDivergence,ORF 'nef' exceeds maximum distance tolerance. It is 2.0371 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.34148.,nef -KX505501.1,MutatedStartCodon,ORF 'nef' has a mutated start codon: 'AGA'.,nef -KX505501.1,MutatedStopCodon,ORF 'nef' has a mutated stop codon: 'GAC'.,nef -KX505501.1,RevResponseElementDeletion,Query Sequence exceeds maximum deletion tolerance in RRE. Contains 265 deletions with max tolerance of 20 deletions., -KX505501.1,LongDeletion,Query sequence contains a very large deletion., -KX505501.1,Scramble,Sequence is plus-scrambled., -MN691959,Frameshift,ORF 'vpu' at 5911-6156 contains out of frame indels that impact 122 positions.,vpu -MN691959,InternalStop,ORF 'vpu' at 5911-6156 contains an internal stop codon at 5974.,vpu -MN691959,SequenceDivergence,ORF 'vpu' exceeds maximum distance tolerance. It is 1.70488 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.07949.,vpu -MN691959,Frameshift,ORF 'tat_exon2' at 8238-8330 contains out of frame indels that impact 32 positions.,tat_exon2 -MN691959,Deletion,ORF 'tat_exon2' exceeds maximum deletion tolerance. Contains 45 deletions with max tolerance of 0 deletions.,tat_exon2 -MN692074,Frameshift,ORF 'pol' at 2085-4082 contains out of frame indels that impact 676 positions.,pol -MN692074,Deletion,ORF 'pol' exceeds maximum deletion tolerance. Contains 981 deletions with max tolerance of 93 deletions.,pol -MN692074,MutatedStopCodon,ORF 'pol' has a mutated stop codon: 'GAT'.,pol -MN692074,Frameshift,ORF 'env' at 4083-4082 contains out of frame indels that impact 1714 positions.,env -MN692074,Deletion,ORF 'env' exceeds maximum deletion tolerance. Contains 2073 deletions with max tolerance of 54 deletions.,env -MN692074,SequenceDivergence,ORF 'env' exceeds maximum distance tolerance. It is 2.05571 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.35595.,env -MN692074,Frameshift,ORF 'vif' at 4083-4082 contains out of frame indels that impact 386 positions.,vif -MN692074,Deletion,ORF 'vif' exceeds maximum deletion tolerance. Contains 81 deletions with max tolerance of 12 deletions.,vif -MN692074,SequenceDivergence,ORF 'vif' exceeds maximum distance tolerance. It is 1.99787 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.78802.,vif -MN692074,Frameshift,ORF 'vpr' at 4083-4082 contains out of frame indels that impact 194 positions.,vpr -MN692074,Insertion,ORF 'vpr' exceeds maximum insertion tolerance. Contains 207 insertions with max tolerance of 6 insertions.,vpr -MN692074,SequenceDivergence,ORF 'vpr' exceeds maximum distance tolerance. It is 1.9951 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.58981.,vpr -MN692074,Frameshift,ORF 'tat_exon1' at 4083-4082 contains out of frame indels that impact 144 positions.,tat_exon1 -MN692074,Insertion,ORF 'tat_exon1' exceeds maximum insertion tolerance. Contains 249 insertions with max tolerance of 0 insertions.,tat_exon1 -MN692074,SequenceDivergence,ORF 'tat_exon1' exceeds maximum distance tolerance. It is 2.21806 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.85696.,tat_exon1 -MN692074,Frameshift,ORF 'rev_exon1' at 4083-4082 contains out of frame indels that impact 51 positions.,rev_exon1 -MN692074,Insertion,ORF 'rev_exon1' exceeds maximum insertion tolerance. Contains 387 insertions with max tolerance of 0 insertions.,rev_exon1 -MN692074,SequenceDivergence,ORF 'rev_exon1' exceeds maximum distance tolerance. It is 2.25 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.92308.,rev_exon1 -MN692074,Frameshift,ORF 'vpu' at 4083-4082 contains out of frame indels that impact 166 positions.,vpu -MN692074,Insertion,ORF 'vpu' exceeds maximum insertion tolerance. Contains 249 insertions with max tolerance of 24 insertions.,vpu -MN692074,SequenceDivergence,ORF 'vpu' exceeds maximum distance tolerance. It is 2.00052 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.07949.,vpu -MN692074,Frameshift,ORF 'tat_exon2' at 4083-4082 contains out of frame indels that impact 62 positions.,tat_exon2 -MN692074,Deletion,ORF 'tat_exon2' exceeds maximum deletion tolerance. Contains 60 deletions with max tolerance of 0 deletions.,tat_exon2 -MN692074,SequenceDivergence,ORF 'tat_exon2' exceeds maximum distance tolerance. It is 2.24194 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.18333.,tat_exon2 -MN692074,Frameshift,ORF 'rev_exon2' at 4083-4082 contains out of frame indels that impact 184 positions.,rev_exon2 -MN692074,Deletion,ORF 'rev_exon2' exceeds maximum deletion tolerance. Contains 243 deletions with max tolerance of 7 deletions.,rev_exon2 -MN692074,SequenceDivergence,ORF 'rev_exon2' exceeds maximum distance tolerance. It is 2.21413 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.75773.,rev_exon2 -MN692074,Frameshift,ORF 'nef' at 4083-4086 contains out of frame indels that impact 412 positions.,nef -MN692074,Deletion,ORF 'nef' exceeds maximum deletion tolerance. Contains 123 deletions with max tolerance of 48 deletions.,nef -MN692074,SequenceDivergence,ORF 'nef' exceeds maximum distance tolerance. It is 2.00372 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.34148.,nef -MN692074,MutatedStartCodon,ORF 'nef' has a mutated start codon: 'ACC'.,nef -MN692074,MutatedStopCodon,ORF 'nef' has a mutated stop codon: 'CAG'.,nef -MN692074,RevResponseElementDeletion,Query Sequence exceeds maximum deletion tolerance in RRE. Contains 265 deletions with max tolerance of 20 deletions., -MN692074,LongDeletion,Query sequence contains a very large deletion., -MN090335,Frameshift,ORF 'gag' at 482-1665 contains out of frame indels that impact 1230 positions.,gag -MN090335,InternalStop,ORF 'gag' at 482-1665 contains an internal stop codon at 683.,gag -MN090335,MutatedStartCodon,ORF 'gag' has a mutated start codon: 'ATA'.,gag -MN090335,MutatedStopCodon,ORF 'gag' has a mutated stop codon: 'AAA'.,gag -MN090335,Frameshift,ORF 'tat_exon2' at 7740-7832 contains out of frame indels that impact 32 positions.,tat_exon2 -MN090335,Deletion,ORF 'tat_exon2' exceeds maximum deletion tolerance. Contains 45 deletions with max tolerance of 0 deletions.,tat_exon2 -MN090335,PackagingSignalDeletion,Query sequence exceeds maximum deletion tolerance in PSI. Contains 42 deletions with max tolerance of 10 deletions., -MN090335,MajorSpliceDonorSiteMutated,Query sequence has a mutated splice donor site: AT. The context is TTAACTGCGAAT-----CGTTC., -MN090335,Scramble,Sequence is minus-scrambled., -MN090335,InternalInversion,Sequence contains an internal inversion., -MN090376,Frameshift,ORF 'gag' at 541-1590 contains out of frame indels that impact 754 positions.,gag -MN090376,InternalStop,ORF 'gag' at 541-1590 contains an internal stop codon at 598.,gag -MN090376,SequenceDivergence,ORF 'gag' exceeds maximum distance tolerance. It is 1.33506 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.98443.,gag -MN090376,MutatedStartCodon,ORF 'gag' has a mutated start codon: 'TGC'.,gag -MN090376,PackagingSignalDeletion,Query sequence exceeds maximum deletion tolerance in PSI. Contains 95 deletions with max tolerance of 10 deletions., -MN090376,MajorSpliceDonorSiteMutated,Query sequence has a missing splice donor site: --. The context is ----------------------., -MN090376,Scramble,Sequence is minus-scrambled., -MN090376,InternalInversion,Sequence contains an internal inversion., -MK115690.1,PackagingSignalDeletion,Query sequence exceeds maximum deletion tolerance in PSI. Contains 12 deletions with max tolerance of 10 deletions., -MK115690.1,MajorSpliceDonorSiteMutated,Query sequence has a mutated splice donor site: G-. The context is GCGGCGACTGG-----------., -MK115690.1,Scramble,Sequence is plus-scrambled., -MK115571.1,PackagingSignalDeletion,Query sequence exceeds maximum deletion tolerance in PSI. Contains 79 deletions with max tolerance of 10 deletions., -MK115571.1,MajorSpliceDonorSiteMutated,Query sequence has a missing splice donor site: --. The context is ----------------------., -MK115514.1,MajorSpliceDonorSiteMutated,Query sequence has a missing splice donor site: --. The context is GCGGCGACT--------CGAAA., -MK115488.1,MajorSpliceDonorSiteMutated,Query sequence has a missing splice donor site: --. The context is GCGGCGACT--------CGAAA., -MK115030.1,Scramble,Sequence is minus-scrambled., -MK115030.1,InternalInversion,Sequence contains an internal inversion., -MK115211.1,Frameshift,ORF 'env' at 5688-8198 contains out of frame indels that impact 757 positions.,env -MK115211.1,InternalStop,ORF 'env' at 5688-8198 contains an internal stop codon at 6354.,env -MK115211.1,PackagingSignalDeletion,Query sequence exceeds maximum deletion tolerance in PSI. Contains 20 deletions with max tolerance of 10 deletions., -MK115211.1,MajorSpliceDonorSiteMutated,Query sequence has a missing splice donor site: --. The context is GCGGCG----------------., -MK115211.1,Scramble,Sequence is minus-scrambled., -MK115211.1,InternalInversion,Sequence contains an internal inversion., -MK115158.1,Deletion,ORF 'tat_exon2' exceeds maximum deletion tolerance. Contains 15 deletions with max tolerance of 0 deletions.,tat_exon2 -MK115158.1,PackagingSignalDeletion,Query sequence exceeds maximum deletion tolerance in PSI. Contains 20 deletions with max tolerance of 10 deletions., -MK115158.1,MajorSpliceDonorSiteMutated,Query sequence has a missing splice donor site: --. The context is GCGGCGAACG------------., -MK114705.1,Frameshift,ORF 'nef' at 8551-9118 contains out of frame indels that impact 122 positions.,nef -MK114705.1,InternalStop,ORF 'nef' at 8551-9118 contains an internal stop codon at 8983.,nef -MK114705.1,MutatedStopCodon,ORF 'nef' has a mutated stop codon: 'ACG'.,nef -MK114856.1,Frameshift,ORF 'gag' at 493-2022 contains out of frame indels that impact 828 positions.,gag -MK114856.1,InternalStop,ORF 'gag' at 493-2022 contains an internal stop codon at 538.,gag -MK114856.1,SequenceDivergence,ORF 'gag' exceeds maximum distance tolerance. It is 1.871 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.98443.,gag -MK114856.1,MutatedStartCodon,ORF 'gag' has a mutated start codon: 'ATA'.,gag -MK114856.1,Frameshift,ORF 'pol' at 1788-4826 contains out of frame indels that impact 1998 positions.,pol -MK114856.1,InternalStop,ORF 'pol' at 1788-4826 contains an internal stop codon at 1917.,pol -MK114856.1,Frameshift,ORF 'env' at 5954-8521 contains out of frame indels that impact 1453 positions.,env -MK114856.1,InternalStop,ORF 'env' at 5954-8521 contains an internal stop codon at 6056.,env -MK114856.1,SequenceDivergence,ORF 'env' exceeds maximum distance tolerance. It is 1.89012 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.35595.,env -MK114856.1,Frameshift,ORF 'vif' at 4771-5349 contains out of frame indels that impact 328 positions.,vif -MK114856.1,InternalStop,ORF 'vif' at 4771-5349 contains an internal stop codon at 4882.,vif -MK114856.1,SequenceDivergence,ORF 'vif' exceeds maximum distance tolerance. It is 1.92813 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.78802.,vif -MK114856.1,MutatedStartCodon,ORF 'vif' has a mutated start codon: 'ATA'.,vif -MK114856.1,Frameshift,ORF 'vpr' at 5289-5579 contains out of frame indels that impact 143 positions.,vpr -MK114856.1,InternalStop,ORF 'vpr' at 5289-5579 contains an internal stop codon at 5340.,vpr -MK114856.1,SequenceDivergence,ORF 'vpr' exceeds maximum distance tolerance. It is 1.70619 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.58981.,vpr -MK114856.1,MutatedStartCodon,ORF 'vpr' has a mutated start codon: 'ATA'.,vpr -MK114856.1,Frameshift,ORF 'tat_exon1' at 5560-5774 contains out of frame indels that impact 123 positions.,tat_exon1 -MK114856.1,Deletion,ORF 'tat_exon1' exceeds maximum deletion tolerance. Contains 186 deletions with max tolerance of 0 deletions.,tat_exon1 -MK114856.1,MutatedStartCodon,ORF 'tat_exon1' has a mutated start codon: 'ATA'.,tat_exon1 -MK114856.1,Frameshift,ORF 'vpu' at 5791-6039 contains out of frame indels that impact 122 positions.,vpu -MK114856.1,InternalStop,ORF 'vpu' at 5791-6039 contains an internal stop codon at 5857.,vpu -MK114856.1,SequenceDivergence,ORF 'vpu' exceeds maximum distance tolerance. It is 1.77195 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.07949.,vpu -MK114856.1,Frameshift,ORF 'tat_exon2' at 8103-8195 contains out of frame indels that impact 37 positions.,tat_exon2 -MK114856.1,Deletion,ORF 'tat_exon2' exceeds maximum deletion tolerance. Contains 45 deletions with max tolerance of 0 deletions.,tat_exon2 -MK114856.1,Frameshift,ORF 'rev_exon2' at 8104-8379 contains out of frame indels that impact 69 positions.,rev_exon2 -MK114856.1,InternalStop,ORF 'rev_exon2' at 8104-8379 contains an internal stop codon at 8161.,rev_exon2 -MK114856.1,SequenceDivergence,ORF 'rev_exon2' exceeds maximum distance tolerance. It is 0.84783 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.75773.,rev_exon2 -MK114856.1,Frameshift,ORF 'nef' at 8523-9176 contains out of frame indels that impact 403 positions.,nef -MK114856.1,InternalStop,ORF 'nef' at 8523-9176 contains an internal stop codon at 8724.,nef -MK114856.1,SequenceDivergence,ORF 'nef' exceeds maximum distance tolerance. It is 1.95 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.34148.,nef -MK114856.1,MutatedStartCodon,ORF 'nef' has a mutated start codon: 'ATA'.,nef -MK114856.1,APOBECHypermutation,Query sequence shows evidence of APOBEC3F/G-mediated hypermutation (p = 3.639064030015132e-65)., -MK115009.1,Frameshift,ORF 'gag' at 303-1821 contains out of frame indels that impact 844 positions.,gag -MK115009.1,InternalStop,ORF 'gag' at 303-1821 contains an internal stop codon at 348.,gag -MK115009.1,SequenceDivergence,ORF 'gag' exceeds maximum distance tolerance. It is 1.8444 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.98443.,gag -MK115009.1,MutatedStopCodon,ORF 'gag' has a mutated stop codon: 'AAA'.,gag -MK115009.1,Frameshift,ORF 'pol' at 1614-4625 contains out of frame indels that impact 1728 positions.,pol -MK115009.1,InternalStop,ORF 'pol' at 1614-4625 contains an internal stop codon at 1713.,pol -MK115009.1,Frameshift,ORF 'env' at 5753-8353 contains out of frame indels that impact 1437 positions.,env -MK115009.1,InternalStop,ORF 'env' at 5753-8353 contains an internal stop codon at 5849.,env -MK115009.1,SequenceDivergence,ORF 'env' exceeds maximum distance tolerance. It is 1.87886 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.35595.,env -MK115009.1,Frameshift,ORF 'vif' at 4570-5148 contains out of frame indels that impact 166 positions.,vif -MK115009.1,InternalStop,ORF 'vif' at 4570-5148 contains an internal stop codon at 4630.,vif -MK115009.1,SequenceDivergence,ORF 'vif' exceeds maximum distance tolerance. It is 1.99021 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.78802.,vif -MK115009.1,MutatedStartCodon,ORF 'vif' has a mutated start codon: 'ATA'.,vif -MK115009.1,Frameshift,ORF 'vpr' at 5088-5378 contains out of frame indels that impact 88 positions.,vpr -MK115009.1,InternalStop,ORF 'vpr' at 5088-5378 contains an internal stop codon at 5247.,vpr -MK115009.1,SequenceDivergence,ORF 'vpr' exceeds maximum distance tolerance. It is 1.28021 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.58981.,vpr -MK115009.1,Frameshift,ORF 'tat_exon1' at 5359-5573 contains out of frame indels that impact 35 positions.,tat_exon1 -MK115009.1,Deletion,ORF 'tat_exon1' exceeds maximum deletion tolerance. Contains 54 deletions with max tolerance of 0 deletions.,tat_exon1 -MK115009.1,MutatedStartCodon,ORF 'tat_exon1' has a mutated start codon: 'ATA'.,tat_exon1 -MK115009.1,InternalStop,ORF 'vpu' at 5590-5835 contains an internal stop codon at 5815.,vpu -MK115009.1,Frameshift,ORF 'rev_exon2' at 7936-8211 contains out of frame indels that impact 70 positions.,rev_exon2 -MK115009.1,InternalStop,ORF 'rev_exon2' at 7936-8211 contains an internal stop codon at 7993.,rev_exon2 -MK115009.1,SequenceDivergence,ORF 'rev_exon2' exceeds maximum distance tolerance. It is 0.78261 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.75773.,rev_exon2 -MK115009.1,Frameshift,ORF 'nef' at 8355-8996 contains out of frame indels that impact 395 positions.,nef -MK115009.1,InternalStop,ORF 'nef' at 8355-8996 contains an internal stop codon at 8796.,nef -MK115009.1,SequenceDivergence,ORF 'nef' exceeds maximum distance tolerance. It is 1.70049 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.34148.,nef -MK115009.1,MutatedStartCodon,ORF 'nef' has a mutated start codon: 'ATA'.,nef -MK115009.1,APOBECHypermutation,Query sequence shows evidence of APOBEC3F/G-mediated hypermutation (p = 1.2040967664149076e-41)., -MK115009.1,Scramble,Sequence is minus-scrambled., -MK115009.1,InternalInversion,Sequence contains an internal inversion., -MK115387.1,Frameshift,ORF 'tat_exon2' at 7870-7962 contains out of frame indels that impact 32 positions.,tat_exon2 -MK115387.1,Deletion,ORF 'tat_exon2' exceeds maximum deletion tolerance. Contains 45 deletions with max tolerance of 0 deletions.,tat_exon2 -MK116110.1,Frameshift,ORF 'gag' at 118-1601 contains out of frame indels that impact 973 positions.,gag -MK116110.1,InternalStop,ORF 'gag' at 118-1601 contains an internal stop codon at 235.,gag -MK116110.1,SequenceDivergence,ORF 'gag' exceeds maximum distance tolerance. It is 1.73094 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.98443.,gag -MK116110.1,MutatedStartCodon,ORF 'gag' has a mutated start codon: 'GCT'.,gag -MK116110.1,MutatedStopCodon,ORF 'gag' has a mutated stop codon: 'AAA'.,gag -MK116110.1,PackagingSignalDeletion,Query sequence exceeds maximum deletion tolerance in PSI. Contains 22 deletions with max tolerance of 10 deletions., -MK116110.1,MajorSpliceDonorSiteMutated,Query sequence has a mutated splice donor site: CC. The context is CCCCCCACTCCCTGACATGCTG., -MK114997.1,Frameshift,ORF 'env' at 5651-8207 contains out of frame indels that impact 1167 positions.,env -MK114997.1,InternalStop,ORF 'env' at 5651-8207 contains an internal stop codon at 6695.,env -MK114997.1,MutatedStopCodon,ORF 'env' has a mutated stop codon: 'AGA'.,env -MK115065.1,PackagingSignalDeletion,Query sequence exceeds maximum deletion tolerance in PSI. Contains 75 deletions with max tolerance of 10 deletions., -MK115065.1,MajorSpliceDonorSiteMutated,Query sequence has a missing splice donor site: --. The context is ----------------------., -MK115065.1,Scramble,Sequence is plus-scrambled., -MK115065.1,InternalInversion,Sequence contains an internal inversion., -MK115464.1,Frameshift,ORF 'gag' at 795-2297 contains out of frame indels that impact 637 positions.,gag -MK115464.1,InternalStop,ORF 'gag' at 795-2297 contains an internal stop codon at 1257.,gag -MK115464.1,SequenceDivergence,ORF 'gag' exceeds maximum distance tolerance. It is 1.495 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.98443.,gag -MK115464.1,MutatedStartCodon,ORF 'gag' has a mutated start codon: 'ATA'.,gag -MK115464.1,Frameshift,ORF 'pol' at 2090-5101 contains out of frame indels that impact 1716 positions.,pol -MK115464.1,InternalStop,ORF 'pol' at 2090-5101 contains an internal stop codon at 2189.,pol -MK115464.1,Frameshift,ORF 'env' at 6229-8799 contains out of frame indels that impact 1449 positions.,env -MK115464.1,InternalStop,ORF 'env' at 6229-8799 contains an internal stop codon at 6430.,env -MK115464.1,SequenceDivergence,ORF 'env' exceeds maximum distance tolerance. It is 1.83161 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.35595.,env -MK115464.1,Frameshift,ORF 'vif' at 5046-5624 contains out of frame indels that impact 248 positions.,vif -MK115464.1,InternalStop,ORF 'vif' at 5046-5624 contains an internal stop codon at 5253.,vif -MK115464.1,SequenceDivergence,ORF 'vif' exceeds maximum distance tolerance. It is 1.49427 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.78802.,vif -MK115464.1,Frameshift,ORF 'tat_exon1' at 5835-6049 contains out of frame indels that impact 129 positions.,tat_exon1 -MK115464.1,Deletion,ORF 'tat_exon1' exceeds maximum deletion tolerance. Contains 195 deletions with max tolerance of 0 deletions.,tat_exon1 -MK115464.1,MutatedStartCodon,ORF 'tat_exon1' has a mutated start codon: 'ATA'.,tat_exon1 -MK115464.1,Frameshift,ORF 'vpu' at 6066-6311 contains out of frame indels that impact 124 positions.,vpu -MK115464.1,InternalStop,ORF 'vpu' at 6066-6311 contains an internal stop codon at 6132.,vpu -MK115464.1,SequenceDivergence,ORF 'vpu' exceeds maximum distance tolerance. It is 1.88171 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.07949.,vpu -MK115464.1,Frameshift,ORF 'tat_exon2' at 8381-8473 contains out of frame indels that impact 37 positions.,tat_exon2 -MK115464.1,Deletion,ORF 'tat_exon2' exceeds maximum deletion tolerance. Contains 45 deletions with max tolerance of 0 deletions.,tat_exon2 -MK115464.1,Frameshift,ORF 'rev_exon2' at 8382-8657 contains out of frame indels that impact 45 positions.,rev_exon2 -MK115464.1,InternalStop,ORF 'rev_exon2' at 8382-8657 contains an internal stop codon at 8439.,rev_exon2 -MK115464.1,Frameshift,ORF 'nef' at 8801-9451 contains out of frame indels that impact 178 positions.,nef -MK115464.1,Deletion,ORF 'nef' exceeds maximum deletion tolerance. Contains 234 deletions with max tolerance of 48 deletions.,nef -MK115464.1,MutatedStartCodon,ORF 'nef' has a mutated start codon: 'ATA'.,nef -MK115464.1,APOBECHypermutation,Query sequence shows evidence of APOBEC3F/G-mediated hypermutation (p = 4.937891251407691e-23)., -MK115520.1,Frameshift,ORF 'pol' at 1988-5004 contains out of frame indels that impact 1225 positions.,pol -MK115520.1,InternalStop,ORF 'pol' at 1988-5004 contains an internal stop codon at 3188.,pol -MK115520.1,MutatedStopCodon,ORF 'pol' has a mutated stop codon: 'AGA'.,pol -MK115520.1,MajorSpliceDonorSiteMutated,Query sequence has a mutated splice donor site: GA. The context is CTGGTAACTAGAGATCGAAAGT., -MK115520.1,InternalInversion,Sequence contains an internal inversion., -MK115570.1,PackagingSignalDeletion,Query sequence exceeds maximum deletion tolerance in PSI. Contains 11 deletions with max tolerance of 10 deletions., -MK115570.1,MajorSpliceDonorSiteMutated,Query sequence has a missing splice donor site: --. The context is GCGGCG-----------CGAAA., -MK115702.1,PackagingSignalDeletion,Query sequence exceeds maximum deletion tolerance in PSI. Contains 54 deletions with max tolerance of 10 deletions., -MK115702.1,MajorSpliceDonorSiteMutated,Query sequence has a missing splice donor site: --. The context is ---------------------A., -MK115702.1,Scramble,Sequence is plus-scrambled., -MK115702.1,InternalInversion,Sequence contains an internal inversion., -MK115095.1,Frameshift,ORF 'gag' at 189-1697 contains out of frame indels that impact 806 positions.,gag -MK115095.1,InternalStop,ORF 'gag' at 189-1697 contains an internal stop codon at 234.,gag -MK115095.1,SequenceDivergence,ORF 'gag' exceeds maximum distance tolerance. It is 1.8384 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.98443.,gag -MK115095.1,MutatedStartCodon,ORF 'gag' has a mutated start codon: 'ATA'.,gag -MK115095.1,Frameshift,ORF 'pol' at 1490-4501 contains out of frame indels that impact 1865 positions.,pol -MK115095.1,InternalStop,ORF 'pol' at 1490-4501 contains an internal stop codon at 1589.,pol -MK115095.1,Frameshift,ORF 'env' at 5629-8229 contains out of frame indels that impact 1426 positions.,env -MK115095.1,InternalStop,ORF 'env' at 5629-8229 contains an internal stop codon at 5725.,env -MK115095.1,SequenceDivergence,ORF 'env' exceeds maximum distance tolerance. It is 1.87353 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.35595.,env -MK115095.1,Frameshift,ORF 'vif' at 4446-5024 contains out of frame indels that impact 332 positions.,vif -MK115095.1,InternalStop,ORF 'vif' at 4446-5024 contains an internal stop codon at 4557.,vif -MK115095.1,SequenceDivergence,ORF 'vif' exceeds maximum distance tolerance. It is 1.85469 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.78802.,vif -MK115095.1,Frameshift,ORF 'tat_exon1' at 5235-5449 contains out of frame indels that impact 35 positions.,tat_exon1 -MK115095.1,Deletion,ORF 'tat_exon1' exceeds maximum deletion tolerance. Contains 54 deletions with max tolerance of 0 deletions.,tat_exon1 -MK115095.1,MutatedStartCodon,ORF 'tat_exon1' has a mutated start codon: 'ATA'.,tat_exon1 -MK115095.1,Frameshift,ORF 'vpu' at 5466-5711 contains out of frame indels that impact 122 positions.,vpu -MK115095.1,InternalStop,ORF 'vpu' at 5466-5711 contains an internal stop codon at 5532.,vpu -MK115095.1,SequenceDivergence,ORF 'vpu' exceeds maximum distance tolerance. It is 1.88171 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.07949.,vpu -MK115095.1,Frameshift,ORF 'rev_exon2' at 7812-8087 contains out of frame indels that impact 69 positions.,rev_exon2 -MK115095.1,InternalStop,ORF 'rev_exon2' at 7812-8087 contains an internal stop codon at 7869.,rev_exon2 -MK115095.1,APOBECHypermutation,Query sequence shows evidence of APOBEC3F/G-mediated hypermutation (p = 8.33506116803153e-40)., -OQ092466,MajorSpliceDonorSiteMutated,Query sequence has a mutated splice donor site: GC. The context is GCGGCGACTGGCGAGTACGCCA., -OQ092463,PackagingSignalDeletion,Query sequence exceeds maximum deletion tolerance in PSI. Contains 26 deletions with max tolerance of 10 deletions., -OQ092463,MajorSpliceDonorSiteMutated,Query sequence has a missing splice donor site: --. The context is ------------GAGTACGCC-., -OQ092465,MajorSpliceDonorSiteMutated,Query sequence has a mutated splice donor site: GA. The context is GCGGCGACTGGAGAGTACGCCT., -OQ092462,PackagingSignalDeletion,Query sequence exceeds maximum deletion tolerance in PSI. Contains 23 deletions with max tolerance of 10 deletions., -OQ092462,MajorSpliceDonorSiteMutated,Query sequence has a missing splice donor site: --. The context is ------------GAGTACGCCA., -OQ092464,PackagingSignalDeletion,Query sequence exceeds maximum deletion tolerance in PSI. Contains 19 deletions with max tolerance of 10 deletions., -OQ092464,MajorSpliceDonorSiteMutated,Query sequence has a missing splice donor site: --. The context is GCGGCG----------------., -OQ092467,PackagingSignalDeletion,Query sequence exceeds maximum deletion tolerance in PSI. Contains 22 deletions with max tolerance of 10 deletions., -OQ092467,MajorSpliceDonorSiteMutated,Query sequence has a missing splice donor site: --. The context is ------------GAGTACGCCA., diff --git a/tests/expected-results-large-csv/holistic.csv b/tests/expected-results-large-csv/holistic.csv index 5db8bb8..1c67e33 100644 --- a/tests/expected-results-large-csv/holistic.csv +++ b/tests/expected-results-large-csv/holistic.csv @@ -1,42 +1 @@ qseqid,intact,qlen,hypermutation_probablility,inferred_subtype,blast_matched_qlen,blast_sseq_coverage,blast_qseq_coverage,blast_sseq_orfs_coverage,orfs_start,orfs_end,blast_n_conseqs,is_reverse_complement -KX505501.1,False,1997,0.7087072014754221,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE,1997,0.2498456472525211,1.2158237356034052,0.17666166916541728,789,8793,4,False -MN691959,False,9493,0.19667690182893238,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE,9493,1.0817040543321672,1.1086063415148004,1.0,789,8793,3,False -MN692074,False,4178,0.36378645339477633,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE,4178,0.5042189750977567,1.1728099569171853,0.411544227886057,789,8793,4,False -MN692145,True,9689,0.1672411051048176,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE,9689,1.130479522535501,1.1271545051088863,1.0,789,8793,3,False -MN090335,False,9069,0.1771850809736527,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE,9069,0.9842560197571517,1.0603153600176425,1.000374812593703,789,8793,3,False -MN090376,False,8985,0.026415767987601813,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE,8985,0.9784935171846059,1.0604340567612687,0.9943778110944528,789,8793,3,False -MK115581.1,True,9495,0.6919440876652894,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE,9495,0.987034369211772,1.0046340179041602,1.0,789,8793,2,False -MK115690.1,False,9689,0.051230576250981485,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE,9689,1.004630582424367,1.0113530808132933,1.0,789,8793,3,False -MK115571.1,False,9394,0.8029570594372466,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE,9394,0.9866227618851615,1.0113902490951672,1.0,789,8793,2,False -MK115514.1,False,9382,0.6482462132632603,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE,9382,0.9864169582218564,1.0173736943082499,1.0,789,8793,2,False -MK115488.1,False,9623,0.6534999185838631,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE,9623,1.0255196542498457,1.0325262392185388,1.0,789,8793,6,False -MK115030.1,False,9126,0.032014462397289556,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE,9126,0.994031693764149,1.0655270655270654,1.0,789,8793,3,False -MK115498.1,True,9461,0.83547963060225,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE,9461,0.9866227618851615,1.0080329774865235,1.0,789,8793,2,False -MK115211.1,False,9032,0.11818291879607423,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE,9032,0.995060712080675,1.0598981399468557,1.0,789,8793,3,False -MK115158.1,False,9143,0.002572269807584293,Ref.47_BF.ES.08.P1942.GQ372987,9143,0.9921577414295317,0.9699223449633599,1.0,234,8211,1,False -MK114705.1,False,9411,0.14584270737492833,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE,9411,1.098065445564931,1.122622463075125,1.0,789,8793,6,False -MK114856.1,False,9477,1.0,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE,9477,1.0485696645400289,1.0812493405085997,1.0,789,8793,4,False -MK115009.1,False,9207,1.0,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE,9207,0.9965013377238114,1.0590854784403172,1.0,789,8793,3,False -MK115387.1,False,9136,0.5436355526687852,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE,9136,0.9796254373327845,1.040936952714536,1.0,789,8793,2,False -MK115491.1,True,9422,0.8961809048805741,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE,9422,1.0037044659394938,1.0299299511780937,1.0,789,8793,2,False -MK116110.1,False,8967,0.07021438897893317,Ref.B.TH.90.BK132.AY173951,8967,0.9957759004001778,0.9972119995539199,0.9986220719027934,140,8123,3,False -MK115527.1,True,9481,0.770862998910788,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE,9481,0.9867256637168141,1.0056956017297753,1.0,789,8793,2,False -MK114997.1,False,9055,0.05560625344150194,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE,9055,0.9784935171846059,1.0516841524019878,1.0,789,8793,2,False -MK115518.1,True,9537,0.6408152618300496,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE,9537,0.9847705289154147,0.9996854356715948,1.0,789,8793,3,False -MK115065.1,False,9214,0.033954952452739495,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE,9214,1.0080263428689031,1.069459518124593,1.0,789,8793,6,False -MK115464.1,False,9663,1.0,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE,9663,0.9838444124305412,0.9893407844354756,1.0,789,8793,2,False -MK115530.1,True,9544,0.5812621948015355,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE,9544,0.9866227618851615,0.9992665549036044,1.0,789,8793,2,False -MK115520.1,False,9589,0.5225247969864292,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE,9589,0.9786993208479111,0.987902805297737,1.012118940529735,789,8793,3,False -MK115503.1,True,9617,0.42870398270204335,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE,9617,0.987034369211772,0.9953207861079338,1.0,789,8793,2,False -MK115570.1,False,9485,0.7406166892211931,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE,9485,0.9866227618851615,1.0057986294148655,1.0,789,8793,2,False -MK115509.1,True,9353,0.7883768413537747,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE,9353,0.9866227618851615,1.0197797498128942,1.0,789,8793,2,False -MK115702.1,False,9098,0.1454272422215308,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE,9098,0.9874459765383824,1.0596834469114091,1.0198650674662668,789,8793,4,False -MK115095.1,False,9137,1.0,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE,9137,0.9907388351512657,1.060085367188355,1.0,789,8793,2,False -MK115490.1,True,9347,0.8875203448314265,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE,9347,0.9848734307470673,1.0204343639670483,1.0,789,8793,3,False -MK115576.1,True,9266,0.8197662757563093,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE,9266,0.9917678534677917,1.0342110943233327,1.0,789,8793,3,False -OQ092466,False,9686,0.3858261890626,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE,9686,1.1015641078411196,1.1192442700805285,1.0,789,8793,3,False -OQ092463,False,9605,0.21628713708846803,Ref.B.TH.90.BK132.AY173951,9605,1.0529124055135617,0.9884435190005205,1.0,140,8123,2,False -OQ092465,False,9659,0.02412789935966586,Ref.28_BF.BR.99.BREPM12817.DQ085874,9659,1.0429696287964005,0.9620043482762191,1.0,200,8207,2,False -OQ092462,False,9714,0.10883643311676816,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE,9714,1.1306853261988064,1.1301214741610048,1.0,789,8793,3,False -OQ092464,False,9556,0.006887768010151674,Ref.28_BF.BR.99.BREPM12817.DQ085874,9556,1.0427446569178853,0.9678735872750105,1.0,200,8207,2,False -OQ092467,False,9936,0.6438715160567257,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE,9936,1.1308911298621116,1.0962157809983897,1.0,789,8793,3,False diff --git a/tests/expected-results-large-csv/regions.csv b/tests/expected-results-large-csv/regions.csv index 2803f62..ef28202 100644 --- a/tests/expected-results-large-csv/regions.csv +++ b/tests/expected-results-large-csv/regions.csv @@ -1,452 +1 @@ qseqid,region,start,end,orientation,distance,indel_impact,protein,nucleotides,subtype_start,subtype_end,subtype_aminoacids,subtype_nucleotides -KX505501.1,gag,336,1745,forward,0.41298449612403343,69,MGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTGGATAGATGGGAAAAAATTCGGCTAAGGCCAGGAGGAAATAAAAGATATAAACTAAAACATATAGTATGGGCAAGCAGAGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGCTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCGACTCTTAAGACAGGATCAGAAGAACTTAAGTCTTTGTATAATACAATAGCAGTCCTCTATTGTGTACATCAAAAAATAGATGTGAAAGACACCAAGGAAGCCTTAGACAAGATAGAAGAAGAGCAAAACAAAATTAGGAAACAAGCACAGCAAACCGCAGCTGCAGGAAGTAGCAGTCAGGTCAGCCACAATTACCCTGTAGTGCAGAATCATCAGGGGCAAATGGTATATCAGGCCCTGTCACCAAGAACTTTAAATGCATGGGTAAAAGTAATAGAGGAAAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGACTTAAACACCATGCTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATTAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAGATTTATAAAAGATGGATAATCCTAGGGTTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCTTTTAGAGACTATGTAGACCGGTTCTATAAAACCCTAAGAGCCGAACAAGCCACACAGGAAGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCGAACCCAGATTGTAAAACTATTTTAAAAGCATTAGGACCAGCAGCCACACTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGGACCCAGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAATGACAAATTCAGCTACAGTAATGATGCAGAAAGGCAATTTTAGGAACCAAAGAAAAACTGTTAAGTGCTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACATCAAATGAAAGACTGTACTGAGAGACAGGCTAATTTTGTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACACCAACT,789,2291,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA -KX505501.1,pol,1628,1745,forward,2.1358565737051802,1950,FCRENLAFPQGKAGEFPSEQTRANSPTSRELQVWGRDTN,TTTTGTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACACCAACT,2084,5095,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -KX505501.1,env,1746,1745,forward,2.1118604651162807,1714,MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ,,6223,8793,MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*,ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA -KX505501.1,vif,1746,1745,forward,2.0488262910798123,386,MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ,,5040,5618,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG -KX505501.1,vpr,1746,1745,forward,2.0036496350364965,194,MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ,,5558,5848,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -KX505501.1,tat_exon1,1746,1745,forward,2.218055555555555,144,MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPT,,5829,6043,MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -KX505501.1,rev_exon1,1746,1745,forward,2.2499999999999996,51,MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPT,,5968,6043,MAGRSGDSDEELIRTVRLIKLLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -KX505501.1,vpu,1746,1745,forward,2.044,166,MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ,,6060,6308,MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*,ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG -KX505501.1,tat_exon2,1746,1745,forward,2.2419354838709675,62,RCIRSTTRTADTELFTRDFPLGTFQ,,8375,8467,RPTSQPRGDPTGPKEPKKKVERETETDPFD*,AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG -KX505501.1,rev_exon2,1746,1745,forward,2.214130434782609,184,RCIRSTTRTADTELFTRDFPLGTFQ,,8376,8651,DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*,GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG -KX505501.1,nef,1746,1777,forward,2.03710407239819,425,MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ,AGATGCATCCGGAGTACTACAAGGACTGCTGA,8795,9415,MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA -MN691959,gag,639,2141,forward,0.05400000000000005,0,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALEKIEEEQNKSKKKAQQAAADTGHSNQISQNYPIVQNIQGQVVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNXXXPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKECTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPSQKQEPIDKELYPLTSLRSLFGNDPSSQ,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGAGAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAACCAGATCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAGTGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCYWYCTWTCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAAGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCWGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGAGTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA,789,2291,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA -MN691959,pol,1934,4945,forward,0.03585657370517925,0,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSLSEAGADRQGTVSFNFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTRALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKITTESIVIWGKTPRFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNKGRQKVVPLTNTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKILFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSATVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAAAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAGAGCATTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGGAAATATGCAAGAACGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGTATAGTAATATGGGGAAAGACTCCTAGATTTAAACTACCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAATATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCTTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGGTATGTTACTAACAAAGGAAGACAAAAGGTTGTCCCCCTAACTAACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAATACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTGCTACGGTTAAGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG,2084,5095,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MN691959,vif,4890,5468,forward,0.0625,0,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYERTHPRISSEVYIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGAACTCATCCAAGAATAAGTTCAGAAGTATACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACATATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACAAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG,5040,5618,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG -MN691959,vpr,5408,5698,forward,0.0625,0,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTQQKRARNGASRS,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCAACAGAAGAGAGCAAGAAATGGAGCCAGTAGATCCTAG,5558,5848,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -MN691959,tat_exon1,5679,5893,forward,0.25,0,MEPVDPRLEPWKHPGSQPKTACTTCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRPPQDSQTHQVSLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCACTTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATGACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCA,5829,6043,MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MN691959,rev_exon1,5818,5893,forward,0.46153846153846145,0,MAGRSGDSDEDLLKTVRLIKFLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCA,5968,6043,MAGRSGDSDEELIRTVRLIKLLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MN691959,vpu,5910,6155,forward,1.704878048780488,122,MQPIQIAIVALVVAIIIAIVV,ATGCAACCTATACAAATAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGATCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGGAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAGTGTCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATATTGATGATCTGTAG,6060,6308,MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*,ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG -MN691959,env,6070,8655,forward,0.10139372822299642,0,MRVKEKCQHLWRWGWRWGTMLLGILMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDAEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLGNATNTNSSNTNSSSGEMMMEKGEIKNCSFNISTSIRNKVQKEYAFFYKLDIVPIDNDTTSYTLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEDVVIRSANFTDNAKTIIVQLNQSVEINCTRPNNNTRKSIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNATLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQFINMWQEVGKAMYAPPISGQIRCSSNITGLLLTRDGGNNNNGSEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNNMTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPNPGGPDRPEGIEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGAYRAIRHIPRRIRQGLERILL,ATGAGAGTGAAGGAGAAGTGTCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATATTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATGCAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGGGGAATGCTACTAATACCAATAGTAGTAATACCAATAGTAGTAGCGGGGAAATGATGATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAAATAAGGTGCAGAAAGAATATGCATTCTTTTATAAACTTGATATAGTACCAATAGATAATGATACTACCAGCTATACGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGATGTAGTAATTAGATCTGCCAATTTCACAGACAATGCTAAAACCATAATAGTACAGCTGAACCAATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGTATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATGCCACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACACTCCCATGCAGAATAAAACAATTTATAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGCGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAACAACAATGGGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAACAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGCTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATAACATGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAAACCCGGGGGGACCCGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTAGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAATATTGGAGTCAGGAGCTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCTATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTATAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA,6223,8793,MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*,ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA -MN691959,tat_exon2,8237,8329,forward,0.19354838709677424,32,RPTSQTRGDPTGPKE,AGACCCACCTCCCAAACCCGGGGGGACCCGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG,8375,8467,RPTSQPRGDPTGPKEPKKKVERETETDPFD*,AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG -MN691959,rev_exon2,8238,8513,forward,0.16304347826086962,0,DPPPKPGGTRQARRNRRRRWRERQRQIHSISERILSTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTILESGAKE,GACCCACCTCCCAAACCCGGGGGGACCCGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTAGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAATATTGGAGTCAGGAGCTAAAGAATAG,8376,8651,DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*,GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG -MN691959,nef,8657,9277,forward,0.043689320388349495,0,MGGKWSKSSVIGWPTVRERMRRAEPAADGVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGIRYPLTFGWCYKLVPVEPDKVEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATGGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATCAGATATCCATTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGGTAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGAATGGATGACCCTGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA,8795,9415,MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA -MN692074,gag,789,2291,forward,0.14990059642147102,0,MGARASVLSAGELDKWEKIRLRPGGKKKYRLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTIATLYCVHQKIEVKDTKEALEKVEEEQNKSKKKAQQAAADTGNNSQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIIMGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQKGNFRNQKRNVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPAQKQEPIDKELYPLASLRSLFGNDPSSQ,ATGGGTGCGAGAGCGTCAGTATTAAGTGCGGGAGAATTAGACAAATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAATAGCAACCCTCTATTGTGTGCATCAAAAGATAGAGGTAAAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGCAGCAGCTGACACAGGAAACAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCACCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAGCCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCATGGGATTAAATAAAATAGTAAGGATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGATCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTGAAAGCATTGGGACCAGCCGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAAGCAAGAGTTTTGGCGGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAAAGGCAATTTTAGGAACCAAAAAAGGAATGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCGCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA,789,2291,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA -MN692074,pol,2084,4081,forward,0.8050695825049854,676,FFREDLAFPQGKAREFPSEQTRANSPTRRELQVWGRDNNSRSEAGADRQGTVSFSFPQITLWQRPLVTIRIGGQLKEALLDTGADDTVLEEMNLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLRKKKSVTVLDVGDAYFSVPLDKEFRKYTAFTIPSRNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPEIVIYQYMDDLYVGSDLEIGQHRIKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEIVPLTKEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYAKMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI,TTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCGCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAGGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAGAAAGAAAAAATCAGTAACAGTACTGGATGTGGGGGATGCATATTTTTCAGTTCCCTTAGATAAAGAATTTAGGAAATATACTGCATTTACCATACCCAGTAGAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATTTTAGAGCCTTTTAGAAAACAAAATCCAGAAATAGTTATCTATCAATACATGGATGATTTATATGTAGGATCTGATTTAGAAATAGGGCAGCATAGAATAAAAATAGAGGAACTGAGACAACATCTGTTGAGATGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCCCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAATAGTACCACTAACAAAAGAAGCAGAGCTGGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTATATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGTCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAAAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCCATACAAAAAGAAACATGGGAAGCATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAATTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGAT,2084,5095,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MN692074,env,4082,4081,forward,2.055707762557078,1714,MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI,,6223,8793,MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*,ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA -MN692074,vif,4082,4081,forward,1.997872340425532,386,MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI,,5040,5618,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG -MN692074,vpr,4082,4081,forward,1.9950980392156863,194,MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI,,5558,5848,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -MN692074,tat_exon1,4082,4081,forward,2.218055555555555,144,MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPD,,5829,6043,MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MN692074,rev_exon1,4082,4081,forward,2.2499999999999996,51,MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPD,,5968,6043,MAGRSGDSDEELIRTVRLIKLLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MN692074,vpu,4082,4081,forward,2.0005208333333333,166,MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI,,6060,6308,MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*,ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG -MN692074,tat_exon2,4082,4081,forward,2.2419354838709675,62,TQWRALRCCI,,8375,8467,RPTSQPRGDPTGPKEPKKKVERETETDPFD*,AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG -MN692074,rev_exon2,4082,4081,forward,2.214130434782609,184,TQWRALRCCI,,8376,8651,DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*,GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG -MN692074,nef,4082,4085,forward,2.0037162162162163,412,MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI,ACCC,8795,9415,MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA -MN692145,gag,775,2280,forward,0.16267465069860276,0,MGARASVLSGGKLDQWEKIRLRPGGKKKYQLKHIVWASKELERFAVNPGLLETTEGCRQILGQLQPSLQTGSEELKSLYNTVATLYCVHQRIDVKDTKEALDKIEEEQNKSKEKAKQAAADTGNSNQVSQNYPIVQNLQGQMVHQAISPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKESFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPGATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRSQRKIVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKQESTGKELYPPLASLRSLFGNDPSSQ,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAAAATTAGATCAATGGGAGAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATCAATTAAAACATATAGTATGGGCAAGCAAGGAACTAGAACGATTTGCAGTTAATCCTGGCCTGTTAGAAACAACAGAAGGATGTAGACAAATACTGGGACAGCTACAACCATCTCTTCAGACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGATGTAAAAGACACCAAGGAAGCATTAGATAAGATAGAGGAAGAGCAAAACAAGAGTAAGGAAAAAGCAAAACAAGCAGCAGCTGACACAGGAAACAGCAACCAGGTCAGCCAAAATTACCCTATAGTGCAGAACCTTCAGGGCCAAATGGTACACCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAATAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACCACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGGATGTATAGTCCTACCAGCATTCTGGATATAAGACAAGGACCAAAAGAATCCTTTAGAGATTATGTAGACCGGTTCTACAAAACTCTAAGAGCTGAGCAAGCCTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCAAACCCAGATTGTAAGACTATCTTAAAAGCATTGGGACCAGGAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAAGCAAGAGTCTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAGCCAAAGGAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAGGGGCATATAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAAAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGAAATTTCCTTCAGAGCAGGCCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGAGTCGACAGGCAAGGAACTGTATCCTCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA,789,2291,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA -MN692145,pol,2070,5084,forward,0.10796019900497522,0,FFRENLAFPQGEARKFPSEQARANSPTRRELQVWGRDNNSLSEAGVDRQGTVSSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQISIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALTEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQCSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKESWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGAKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARVRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGASNRETKLGKAGYVTDRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLINKEKVYLAWVPAHKGIGGNEQVDKLVSTGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAREIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKVKIIRDYGKQMAGDDCVASRQDED,TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGAAATTTCCTTCAGAGCAGGCCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGAGTCGACAGGCAAGGAACTGTATCCTCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTACCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATTAAAGTAAGACAGTATGATCAGATATCCATAGAAATCTGTGGACATAAAGCTATAGGGACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAAATTGGTTGTACTTTAAATTTCCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAACAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGCACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAAAGAACTCAAGATTTCTGGGAAGTTCAATTAGGGATACCACATCCAGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAGGATTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAATGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAATTGAGACAACATCTGTTGAGGTGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGAAAGCTGGACTGTCAATGACATACAAAAGTTAGTGGGTAAATTGAATTGGGCAAGTCAGATTTATGCAGGGATCAAAGTAAAACAATTATGTAAACTTCTTAGGGGAGCCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTTTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAGATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAGTGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAGCCACTGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAGGTTACCCATACAAAAAGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCATCTAATAGAGAGACTAAATTAGGAAAAGCAGGATATGTTACTGACAGAGGAAGACAGAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAACAGTTAATAAACAAGGAAAAGGTCTACTTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTTAGTACTGGAATTAGAAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGAAGAACATGAGAAATACCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTGGTAGCAAGAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAGCTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGACAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGTAATTTCACTAGTACTACAGTTAAGGCCGCCTGTTGGTGGGCGGGGATCAAGCAGGAGTTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGACCAGGCTGAACATCTTAAAACAGCAGTACAAATGGCAGTATTCATTCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTGATACAAGACAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGTAAAAATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG,2084,5095,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MN692145,vif,5029,5607,forward,0.21875,0,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKAKGWFYRHHYESHHPRISSEVHVPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPNLADQLIHLYYFDCFSESAIRSAILGHIVSPSCEYQAGHNKVGSLQYLALAALITPKRRKPPLPSVAKLTEDRWNKPQKTKGHRGSHIRNGH,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAAGGGATGGTTTTATAGACATCACTATGAAAGCCATCATCCAAGAATAAGTTCAGAAGTACATGTCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGATTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCCAACCTAGCAGACCAACTGATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAGTGCCATATTAGGACATATAGTTAGTCCTAGTTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCATTAGCAGCATTAATAACACCAAAAAGGAGAAAGCCCCCTTTGCCTAGTGTTGCAAAGCTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACCGAGGGAGCCATATCAGGAATGGACACTAG,5040,5618,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG -MN692145,vpr,5547,5837,forward,0.34375,0,MEQAPEDQGPPREPYQEWTLELLEELKNEAVRHFPRMWLHGLGQHIYDTYGDTWTGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRTRNGANRS,ATGGAACAAGCCCCAGAAGACCAAGGGCCACCGAGGGAGCCATATCAGGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATGTGGCTCCATGGCTTAGGGCAACATATCTATGACACTTATGGGGATACTTGGACAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTCATTCATTTCAGGATTGGGTGTCAACATAGCAGAATAGGCATTATTCGCCAGAGGAGAACAAGAAATGGAGCCAATAGATCCTAG,5558,5848,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -MN692145,tat_exon1,5818,6032,forward,0.25,0,MEPIDPRLEPWKHPGSQPKTACTSCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRASQDSQTHQVSLSKX,ATGGAGCCAATAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGCCAGCCTAAGACTGCTTGTACCTCTTGCTATTGTAAAAAGTGTTGCTTTCATTGTCAAGTCTGTTTCATGACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCA,5829,6043,MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MN692145,rev_exon1,5957,6032,forward,0.34615384615384626,0,MAGRSGDSDEELLKTVRLIKFLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCA,5968,6043,MAGRSGDSDEELIRTVRLIKLLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MN692145,vpu,6049,6297,forward,0.6733333333333333,0,MNSLQISAIVAIVVAIILAIVVWSIVFIEYRKILRQRKIDRLIDRIRERAEDSGNESEGDQEELSTALMEMGHHAPWDVDDL,ATGAACTCTTTACAAATATCAGCAATAGTAGCAATAGTAGTAGCAATAATACTAGCAATAGTTGTGTGGTCTATAGTATTCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGATAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAACTATCAACAGCACTTATGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG,6060,6308,MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*,ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG -MN692145,env,6212,8782,forward,0.5647651006711409,0,MRVKGIRKNYQQHLWRWGTMLLGMLMICSAAGNLWVTVYYGVPVWKEATTTLFCASDAKAYGTEVHNVWATHACVPTDPNPQEVRLENVTENFNMWRNPMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDLGQESTNNNITGSIEKGEMKNCSFNISSSKGSKGHKEYASFYKLDVVPIGNNKTNGNYSSYRLISCNTSVITQACPKVSFEPIPIHFCAPAGYAILKCNDKQFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSANFTDNAKTIIVQLNQSVEITCTRPGNNTIRGVHMGPGKAFYAGAITGDIRKAHCIINKAKWNSTLRLVVAKLSEQFRNKTINFTQPSGGDPEIVTHSFNCGGEFFYCNTTKLFNSTWNATHMINGTWNDTEGNITLPCRIKQIINMWQEVGKAMYAPPIRGQIRCSSNITGLLLTRDGGKTNESNTTEIFRPGGGDMRDNWRSELYKYKVVRIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGATSMALTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGVKQLQARVLALERYLKDQQLLGFWGCSGKLICTTDVPWNASWSNKSLGQIWENMTWMEWEREIDNYTSLIYTLITESQIQQEQNEKELLELDKWANLWNWFGITNWLWYIKIFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTRFPAPRGPDRPEGIEEEGGERDRDRSAPLVKGFLALIWVDLRSLCLFSYHRLRDLLLIITRIVELLGRRGWEVLKYLWSLLQYWTQELKNSAVSLLNATAIAVAEGTDRIIEVLQRVFRAILHIPTRIRQGFERALL,ATGAGAGTGAAGGGGATCAGGAAGAACTATCAACAGCACTTATGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTGCAGGCAATTTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGGTACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTGAGATTGGAAAATGTGACAGAAAATTTTAATATGTGGAGAAATCCAATGGTAGAACAGATGCATGAGGATATAATTAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAGTTGACCCCACTCTGTGTCACTTTAAATTGCACTGATTTGGGACAGGAGAGTACCAATAATAATATCACAGGAAGTATAGAGAAAGGAGAAATGAAAAACTGCTCTTTCAATATCTCCTCAAGCAAAGGAAGTAAGGGGCATAAAGAATATGCATCTTTTTATAAACTTGATGTAGTACCAATAGGTAATAATAAGACTAATGGTAATTATTCTAGCTATAGGTTGATAAGTTGTAATACCTCAGTCATTACACAGGCCTGTCCAAAAGTATCATTTGAGCCAATTCCCATACATTTTTGTGCCCCGGCTGGGTATGCGATTCTAAAGTGTAATGATAAACAATTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAGTGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGTAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGCCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACCAATCTGTAGAAATTACTTGTACAAGACCCGGTAACAATACAATAAGAGGGGTTCACATGGGACCAGGGAAGGCATTTTATGCAGGAGCAATAACAGGAGATATAAGAAAAGCACATTGTATCATTAATAAAGCAAAATGGAACAGTACTTTAAGACTGGTAGTTGCAAAATTAAGTGAGCAATTTAGGAATAAAACAATAAACTTTACTCAACCCTCAGGAGGGGACCCAGAAATTGTAACACACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACAACAAAACTGTTTAATAGTACTTGGAATGCAACACACATGATTAATGGTACTTGGAATGATACTGAGGGAAATATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGAGGACAAATTAGATGTTCATCCAATATTACAGGGTTGCTATTAACAAGAGATGGTGGTAAGACTAACGAGAGCAACACCACCGAGATCTTTAGACCTGGAGGAGGAGATATGAGAGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAGAATTGAACCATTAGGGGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTCTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAACGTCGATGGCGTTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATTTGTTGCAACTCACAGTCTGGGGCGTCAAGCAGCTCCAGGCAAGAGTCCTGGCTCTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGTTTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGATGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGGACAAATTTGGGAGAACATGACCTGGATGGAGTGGGAAAGAGAAATTGATAATTACACAAGCTTAATATACACTTTAATTACAGAATCGCAGATCCAACAAGAACAGAATGAAAAAGAATTGTTGGAATTGGATAAATGGGCAAATTTGTGGAATTGGTTTGGCATAACAAACTGGCTGTGGTACATAAAAATATTTATAATGATAGTAGGAGGCTTAGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTTCAGACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGCTCCATTAGTGAAAGGATTCTTAGCACTTATCTGGGTCGATCTACGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATAATAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGTCCTCAAATACTTGTGGAGTCTCCTACAGTATTGGACACAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGATTATAGAAGTATTACAAAGAGTTTTTAGAGCTATCCTCCATATCCCTACAAGAATAAGACAGGGCTTTGAAAGGGCTTTGCTATAA,6223,8793,MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*,ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA -MN692145,tat_exon2,8364,8456,forward,0.3870967741935485,0,RPASQPRGDPTGPKESKKKVERETETDPLH,AGACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGCTCCATTAG,8375,8467,RPTSQPRGDPTGPKEPKKKVERETETDPFD*,AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG -MN692145,rev_exon2,8365,8640,forward,0.26086956521739135,0,DPLPSPEGTRQARRNRRRRWRERQRQIRSISERILSTYLGRSTEPVPLQLPPLERLTLDNNEDCGTSGTQGVGSPQILVESPTVLDTGTKE,GACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGCTCCATTAGTGAAAGGATTCTTAGCACTTATCTGGGTCGATCTACGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATAATAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGTCCTCAAATACTTGTGGAGTCTCCTACAGTATTGGACACAGGAACTAAAGAATAG,8376,8651,DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*,GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG -MN692145,nef,8784,9386,forward,0.40765550239234427,0,MGGKWSKSSLVGWPNVRERMRRAEPAADGVGAVSRDLERHGAITSSNTATNNADCAWLEAQKEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQKRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCFKLVPVEPTDRENDRLLHPASLQGMEDPEGEVLEWRFDSRLAFHHMARELHPEYYKNC,ATGGGGGGCAAGTGGTCAAAAAGTAGTTTGGTTGGATGGCCTAATGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATGGGGTGGGAGCAGTATCTCGAGACTTGGAAAGACATGGAGCAATCACAAGTAGTAATACAGCAACTAACAATGCTGATTGTGCCTGGCTAGAAGCACAAAAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAAAAGACAAGATATTCTTGATCTATGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGAGCCCACTGACAGAGAGAATGACAGATTGCTGCACCCTGCAAGCCTGCAGGGGATGGAAGACCCGGAGGGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTGGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAGTACTACAAGAACTGCTGA,8795,9415,MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA -MN090335,gag,481,1664,forward,0.9171874999999998,1230,MAYSPVAAPRLLPCALQQATPQDLNTMLNTVGGHQAAMQMLKEVINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQATGTANILMQRGNFRNQRKPVKCFNCGKEGHIAKNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSHKGRPGNFPQSRPEIPQSRPEPTAPPAPPEESFRFGEATTPSQKQETMDKELYPPLTSLRSLFGNDPSSQ,ATATGTTTTAATCTATATTTTTTCTTTCCCCCTGGCCTTAACCGAATTTTCTCCCATCTATCTAATTCTCCCCCGCTCAATACTGACGCTCTCGCACCCATCTCTCTCCTTCTAGCCTCCGCTAGTCAAAAATGGCGTACTCACCAGTCGCCGCCCCTCGCCTCTTGCCGTGCGCGCTTCAGCAAGCCACCCCACAAGATTTGAACACTATGCTAAACACAGTGGGTGGACACCAAGCAGCTATGCAAATGTTAAAAGAGGTCATCAATGAGGAAGCTGCAGAATGGGATAGATTACATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACACATAATCCACCTATCCCAGTAGGAGAGATTTATAAAAGATGGATAATTCTGGGACTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTTTGGACATAAAGCAAGGGCCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTTTAAGAGCCGAGCAAGCTACACAGGAAGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCAAATCCAGATTGTAAAACTATTTTAAAAGCATTAGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGGGGACCCGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGCAACAGGTACAGCCAACATACTGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGCCTGTTAAATGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACACCAAATGAAGGATTGCACTGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCTCACAAGGGAAGGCCAGGGAACTTTCCTCAGAGCCGACCAGAAATTCCTCAGAGCAGACCAGAGCCAACAGCCCCACCAGCCCCTCCAGAAGAGAGCTTCAGGTTTGGGGAAGCAACAACTCCCTCTCAGAAGCAGGAGACAATGGACAAGGAGCTGTATCCTCCTCTAACTTCCCTCAGATCACTCTTTGGCAACGACCCATCGTCACAATAA,789,2291,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA -MN090335,pol,1427,4468,forward,0.23952802359881997,0,FFRENLAFSQGKARELSSEPTRNSSEQTRANSPTSPSRRELQVWGSNNSLSEAGDNGQGAVSSSNFPQITLWQRPIVTIKIGGQLKEALLDTGADDTVLEDIDLPGKWKPKMIGGIGGFIKVRQYEQVPIEISGHKAIGTVLVGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDGTKWRKLVDFRELNKRTQEFWEIQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKTNPDIVIYQYMDDLYVGSDLEIGQHRTKVEELRQHLMRWGFTTPDKKHQKEPPFLWMGYELHPNKWTVQPIVLPEKESWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKDPVHGAYYDPAKDLIAELQKQGEGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKISTESIVIWGRTPKFRLPIQKETWETWWTDYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRDNKSGKAGYVTDRGRQKVVTINDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIEKAQEDHEKYHSNWRTMASDFNLPPIVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEEKIILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKTIHTDNGRNFTSNTVKAACWWAGIKQEFGIPYNPQSQGVVESMNNELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATEIQTKELQKQITKIPNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,TTTTTTAGGGAAAATCTGGCCTTCTCACAAGGGAAGGCCAGGGAACTTTCCTCAGAGCCGACCAGAAATTCCTCAGAGCAGACCAGAGCCAACAGCCCCACCAGCCCCTCCAGAAGAGAGCTTCAGGTTTGGGGAAGCAACAACTCCCTCTCAGAAGCAGGAGACAATGGACAAGGAGCTGTATCCTCCTCTAACTTCCCTCAGATCACTCTTTGGCAACGACCCATCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAGGACATAGATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAATATGAACAGGTACCCATAGAAATTAGCGGACACAAAGCTATAGGTACAGTGTTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGCACTCTAAATTTTCCCATCAGCCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGACGGCCCAAAGGTTAAACAATGGCCATTAACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTGTTTGCCATAAAGAAAAAAGATGGCACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGAATTCTGGGAAATTCAATTAGGTATACCACATCCTGCAGGGCTAAAAAAGAAAAAATCAGTCACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATAAAGACTTCAGGAAATATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTATAATGTGCTTCCACAAGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAGATCCTGGAGCCTTTTAGAAAGACAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAACATAGAACAAAAGTAGAGGAACTGAGGCAACATCTGATGAGGTGGGGATTTACCACACCAGACAAAAAGCACCAGAAAGAACCCCCATTCCTTTGGATGGGGTATGAACTCCATCCTAATAAATGGACAGTACAGCCTATAGTACTGCCAGAAAAGGAAAGTTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTATGCAGGGATTAAAGTGAAGCAATTATGTAAGCTCCTTAGGGGAACCAAAGCATTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGATCCGGTACATGGAGCATATTATGACCCAGCAAAGGACTTAATAGCAGAACTACAGAAGCAGGGAGAAGGTCAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGGAAATATGCAAGAACGAGGGGTGCCCATACTAATGATGTAAAGCAATTAACAGAGGCAGTGCAAAAAATATCCACAGAAAGTATAGTAATATGGGGAAGGACTCCTAAATTTAGATTACCCATACAAAAAGAAACATGGGAAACATGGTGGACAGATTATTGGCAAGCCACCTGGATCCCTGAGTGGGAGTTTGTCAACACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAAAAAGAACCCATAATAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAATAGGGATAATAAATCAGGAAAAGCAGGATATGTTACTGATAGAGGAAGACAAAAGGTTGTCACCATAAATGACACAACAAATCAGAAAACTGAATTACAAGCCATTCATCTAGCCTTGCAGGATTCGGGGCTAGAAGTAAACATAGTAACAGACTCACAGTATGCATTAGGGATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTCAGTCAAATAATAGAACAGTTAATAAAAAAGGAAAAGGTCTACCTGGCATGGGTGCCAGCCCACAAAGGAATTGGAGGAAATGAACAGGTAGATAAACTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGAAAAAGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAACAATGGCTAGTGATTTTAACCTACCACCTATAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCTATGCATGGGCAAGTAGACTGTAGTCCAGGTATATGGCAGCTAGATTGTACACATTTAGAAGAAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTGATTCCAGCAGAGACAGGGCAAGAAACAGCATACTTTATTTTAAAACTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCCGCAATTTCACCAGTAATACAGTGAAGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGTATTCCCTACAATCCCCAGAGTCAAGGAGTAGTAGAATCTATGAATAATGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGATACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGAAATACAGACTAAAGAATTACAAAAACAAATTACAAAAATTCCAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAACAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAAATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG,2084,5095,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MN090335,vif,4413,4991,forward,0.34375,0,MENRWQVMIVWQVDRMRIRTWKSLVKYHMYISKKAKKWAYRHHYETTHPRISSEVHIPLGDDRLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPDLADHLIHVHYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALTAVITPKKIKPPLPSVKKLTEDRWNKPQKTKGHRESHTMNGH,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAATACCATATGTATATTTCAAAGAAAGCCAAAAAATGGGCTTATAGACACCATTACGAAACCACTCATCCAAGAATAAGTTCAGAAGTACATATCCCACTAGGGGATGATAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAGAGAGACTGGCATTTGGGCCAGGGAGTCTCCATAGAATGGAGAAAAAGGAGATATAGCACACAAGTAGACCCTGACTTAGCAGACCATCTAATTCATGTGCATTATTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATAGAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAACAGCAGTAATAACACCAAAGAAGATAAAGCCACCTTTGCCTAGTGTAAAGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAG,5040,5618,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG -MN090335,vpr,4931,5221,forward,0.38144329896907214,0,MEQAPEDQGPQREPYNEWTLELLEELKREAVRHFPRPWLQDLGQYIYETYGDTWTGVEAIIRILQQMLFIHFRIGCHHSRIGIVLQRRARNGASRS,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAGAGCTTTTAGAAGAGCTTAAGAGAGAAGCTGTTAGACATTTCCCTAGACCATGGCTACAAGACCTAGGACAATATATCTATGAAACTTATGGGGATACTTGGACAGGAGTGGAAGCCATAATAAGAATTCTGCAACAAATGCTGTTTATTCATTTCAGAATTGGGTGTCACCATAGCAGAATAGGCATTGTTCTGCAAAGAAGAGCAAGAAATGGAGCCAGTAGATCATAG,5558,5848,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -MN090335,tat_exon1,5202,5416,forward,0.41666666666666674,0,MEPVDHRLEPWKHPGSQPRTPCTNCYCKKCCFHCQVCFIQKALGISYGRKKRRQRRRSPQDSQTHQVPLPKX,ATGGAGCCAGTAGATCATAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTCCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTGTGTTTTATACAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCA,5829,6043,MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MN090335,rev_exon1,5341,5416,forward,0.5769230769230769,0,MAGRSGDRDEDLLKTVRLIKFLYQSX,ATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCA,5968,6043,MAGRSGDSDEELIRTVRLIKLLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MN090335,vpu,5433,5681,forward,0.6837209302325582,0,MQSLEILAIVALVVAAIIAIVVWTIVGIEYRKILRQRKIDKIINRIRERAEDSGNESEGDQEELSALVVEMGHDAPWDVDDL,ATGCAATCTTTAGAGATATTAGCAATAGTAGCATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTAGGCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGATAAAATAATTAATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGCGAAGGGGATCAGGAAGAATTGTCAGCACTTGTGGTGGAAATGGGGCACGATGCTCCTTGGGATGTTGATGATCTGTAG,6060,6308,MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*,ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG -MN090335,env,5596,8157,forward,0.6589887640449441,31,MRAKGIRKNCQHLWWKWGTMLLGMLMICSAAENLWVTVYYGVPVWRDANTTLFCASDAKAYDTEVHNVWATHACVPTDPNPHEVELKNVTENFNMWKNNMVDQMHEDIINLWDQSLKPCAKLTPLCVTLNCTDLRNDTVGNQTNLTETNTIQGREMTNCSFNITTEIRDKVRKEHALFYKLDVMPIDRDNTSYTLINCNTSVITQACPKVTFEPIPIHYCTPAGFAILKCKDEMFNGTGPCKNVSTVQCTHGIRPVVSTQLLLNGSLAEKEIVLRSENFTDNGKNIIVQLNRSIVINCTRPNNNTRKSISVAGRAIYATGQIIGDIRQAHCNISETDWNDTLSKIVEKLREKFGKDKTIIFNQSSGGDMEIETHSFNCGGEFFYCNTTRLFNSTWSVNGTSINGTKNITLPCRIKQIINRWQEVGKAMYAPPISGIIRCSSNITGLILTRDGGTNNSTEETETFRPGGGNMKDNWRSELYKYKVVKIEPLGVAPTKAQRRVVQREKRAIGALGAMFLGFLGAAGSTMGAASLTLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLSVERYLQDQQLLGIWGCSGKLICTTTVPWNTSWSNKSYDTIWDNMTWMQWDREIQNYTGKIYNLLEESQIQQEKNEKELLELDQWANLWNWFSITKWLWYIKIFIMIVGGLVGLKIIFAVFSIVNRVRQGYSPLSLQTLLPTPRGPDRPEGIEEEGGERDRGRSGRLVTGFLPLIWDDLRSLCLFSYHHLRDLLLIVLRTVQVLGHRGWEILKYWWSLLQYWIQELKNSAVSLLNTIAIAVAEGTDRVIEVGQRIGRAFLHIPRRIRQGLERALQ,ATGAGAGCGAAGGGGATCAGGAAGAATTGTCAGCACTTGTGGTGGAAATGGGGCACGATGCTCCTTGGGATGTTGATGATCTGTAGTGCTGCGGAAAATTTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAGAGATGCAAATACCACTCTATTCTGTGCATCAGATGCTAAAGCATATGATACAGAAGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCTAACCCACATGAAGTAGAATTGAAAAATGTGACAGAAAATTTTAATATGTGGAAAAATAACATGGTGGACCAGATGCATGAGGATATAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGCAAAATTGACCCCACTCTGTGTTACTTTAAATTGCACTGATTTAAGAAATGATACTGTAGGAAATCAAACAAATCTCACTGAAACTAATACAATACAGGGAAGAGAAATGACAAACTGCTCTTTCAATATCACCACAGAGATAAGAGATAAAGTGCGAAAAGAACATGCACTTTTTTATAAACTTGATGTCATGCCAATAGATAGGGATAATACAAGTTATACATTGATAAATTGCAATACCTCAGTCATTACACAGGCCTGTCCAAAGGTAACCTTTGAACCAATTCCCATACATTATTGTACTCCAGCTGGTTTTGCAATTCTAAAGTGTAAGGATGAGATGTTCAATGGAACAGGACCATGTAAGAATGTCAGCACAGTACAATGTACACATGGAATTAGACCAGTAGTGTCAACTCAACTACTGTTAAATGGTAGCCTAGCAGAAAAAGAGATAGTACTTAGATCTGAAAATTTCACAGACAATGGTAAAAATATAATAGTACAGCTAAATAGATCTATAGTAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGTATAAGTGTAGCAGGGAGAGCAATTTATGCAACAGGGCAGATAATAGGAGATATAAGACAAGCACATTGTAACATCAGTGAGACAGATTGGAATGACACTTTAAGCAAAATAGTTGAAAAATTAAGGGAAAAATTTGGAAAAGATAAAACAATAATCTTTAATCAATCATCAGGAGGGGACATGGAAATTGAAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAACACAACACGACTGTTTAATAGTACTTGGAGTGTTAATGGAACTAGCATAAACGGAACTAAAAATATCACACTCCCGTGCAGAATAAAACAAATTATAAACAGGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCTATCAGTGGGATAATTAGGTGCTCATCAAATATTACAGGACTAATATTAACAAGAGATGGTGGTACAAATAATAGTACAGAGGAGACGGAGACCTTCAGACCTGGAGGGGGAAATATGAAGGACAATTGGAGAAGTGAATTATATAAATACAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAAGGCACAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAATAGGAGCGTTAGGAGCTATGTTCCTCGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCACTGACGCTGACGGTACAAGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATCTGCTGAGGGCTATTGAGGCGCAACAGCATATGTTGCAACTCACAGTCTGGGGCATCAAACAGCTCCAGGCAAGAGTCCTGTCTGTGGAAAGATACCTACAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTACTGTGCCTTGGAATACTAGTTGGAGCAATAAATCTTACGATACCATCTGGGATAACATGACCTGGATGCAGTGGGACAGAGAAATTCAAAATTACACAGGGAAAATATACAACTTACTTGAGGAATCGCAAATCCAACAGGAAAAGAATGAAAAGGAATTATTAGAACTAGATCAATGGGCAAATTTGTGGAATTGGTTTAGTATAACAAAATGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTAGTAGGTTTAAAAATAATTTTTGCTGTATTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCATTGCAGACCCTCCTCCCAACCCCGAGGGGACCAGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGGCAGATCCGGAAGATTAGTGACTGGATTCTTACCACTCATCTGGGACGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTACTCTTGATTGTGCTGAGGACTGTGCAAGTTCTGGGACACAGGGGGTGGGAGATCCTCAAGTACTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATACCATAGCTATCGCAGTAGCTGAGGGAACAGATAGGGTTATAGAGGTAGGACAAAGAATTGGCAGAGCTTTTCTCCACATACCTAGAAGGATAAGACAGGGATTAGAAAGGGCTTTGCAATAA,6223,8793,MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*,ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA -MN090335,tat_exon2,7739,7831,forward,0.4838709677419355,32,RPSSQPRGDQTGPKE,AGACCCTCCTCCCAACCCCGAGGGGACCAGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGGCAGATCCGGAAGATTAG,8375,8467,RPTSQPRGDPTGPKEPKKKVERETETDPFD*,AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG -MN090335,rev_exon2,7740,8015,forward,0.4565217391304348,0,DPPPNPEGTRQARRNRRRRWRERQRQIRKISDWILTTHLGRPAEPVPLQLPPLERLTLDCAEDCASSGTQGVGDPQVLVESPAVLDSGTKE,GACCCTCCTCCCAACCCCGAGGGGACCAGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGGCAGATCCGGAAGATTAGTGACTGGATTCTTACCACTCATCTGGGACGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTACTCTTGATTGTGCTGAGGACTGTGCAAGTTCTGGGACACAGGGGGTGGGAGATCCTCAAGTACTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAACTAAAGAATAG,8376,8651,DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*,GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG -MN090335,nef,8159,8812,forward,0.5935483870967742,0,MGGKWSKGCMAGWPTVRERMERIDPRPAARREQAEPAAAGVGAASRDLEKYGAITSSNTSTTNAACAWLEAQEEEEVGFPVRPQVPLRPMTYKAALDLSHFLKEKGGLEGLIWSQRRQEILDLWVYHTQGYFPDWQNYTPGPGVRFPLTFGWCFKLVPVDPDQVEKANEGENNCLLHPMSLHGMEDSEGEVLMWKFDSSLAFHHRAREQHPEYYKDC,ATGGGTGGCAAGTGGTCAAAAGGTTGTATGGCTGGATGGCCTACTGTAAGGGAAAGAATGGAAAGAATTGATCCAAGGCCTGCTGCAAGGAGGGAACAAGCTGAGCCAGCAGCAGCTGGGGTAGGAGCAGCATCTCGAGACTTGGAAAAATATGGAGCAATCACAAGTAGCAATACATCAACTACTAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAAGTAGGCTTTCCAGTCAGACCTCAGGTACCTTTAAGACCAATGACTTACAAAGCAGCTTTGGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTGGTCCCAGAGAAGACAAGAAATCCTTGATTTGTGGGTTTACCACACACAAGGATACTTCCCTGATTGGCAAAACTACACACCAGGACCAGGGGTCAGATTTCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGACCAAGTAGAGAAGGCCAATGAAGGAGAGAACAACTGCTTGCTACACCCTATGAGCCTGCATGGGATGGAAGACTCAGAGGGAGAAGTGTTAATGTGGAAGTTTGACAGCAGCCTGGCGTTTCATCATAGAGCCCGAGAACAACATCCGGAGTACTACAAAGACTGCTGA,8795,9415,MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA -MN090376,gag,540,1589,forward,1.3350597609561752,754,MYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSASVMMQKGNFRSQRKMVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFPQSRTEPTAPPEESFSFGEKTTTPSQKQEPIDQELYPLSSLKSLFGNDP,TGCTACTGTATTAAATAATGATTTAAGTTCCTCTGATCCTGTCTGAAGTGCTGGTTGTAGCTGTCCCAGTATTTGTCTACAGCCTTCTGATGTTTCTAACAGGCCAGGATTAACTGCGAATCGTTCCAGTTCCCTGCTTGCCCATACTATATGTTTTAACTTATATCTTTTCTTTCCCCCTGGCCTTAACCGAATTTTCTCCCATTTGTCTAATTCTCCCCCGCTTAATACCGACGCTCTCGCACCCATCTCTCTCCTTCTAGCCTCCGCTAGTCAAAAAGGCGTACTCACCGTTCGCCGCCCCTCGCCTCTTGCTGTGCGCGCTTCAGCAAGCCGAGTCCGATAATTCTGGGATTAAATAAAATAGTAAGAATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGATCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCAAACCCAGATTGTAAGACCATTTTAAAAGCATTGGGTCCAGCAGCTACACTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCGGAAGCAATGAGCCAAGTAACAAACTCAGCTTCCGTGATGATGCAGAAAGGCAATTTTAGGAGCCAAAGAAAGATGGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCAAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGGAACTTTCCTCAAAGCAGAACAGAACCAACAGCCCCACCAGAGGAGAGCTTCAGTTTTGGGGAGAAGACAACAACTCCTTCTCAGAAGCAGGAGCCGATAGACCAGGAACTATACCCTCTGTCTTCCCTCAAATCACTCTTTGGCAACGACCCTTAGTCACAGTAA,789,2291,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA -MN090376,pol,1382,4393,forward,0.1952380952380952,0,FFRENLAFPQGEARELSSKQNRTNSPTRGELQFWGEDNNSFSEAGADRPGTIPSVFPQITLWQRPLVTVRIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVKQYDQISVEICGHKAIGTVLIGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALTEICKEMEKEGKISKIGPENPYNTPIFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKVEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGAKALTEVVPLTEEAELELAENREILKEPVHGVYYDPTKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWESWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRETKSGKAGYVTDRGRQKVISLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIQKEKVYLAWVPAHKGIGGNEQVDKLVSSGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNIPPVVAKEIVACCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVTTIHTDNGPNFTSNAVKAACWWAGVKQEFGIPYNPQSQGVVESMNKQLKQIIGQVREQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDGCVASRQDED,TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGGAACTTTCCTCAAAGCAGAACAGAACCAACAGCCCCACCAGAGGAGAGCTTCAGTTTTGGGGAGAAGACAACAACTCCTTCTCAGAAGCAGGAGCCGATAGACCAGGAACTATACCCTCTGTCTTCCCTCAAATCACTCTTTGGCAACGACCCTTAGTCACAGTAAGGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTGTTAGAAGAAATGAGTTTACCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGCTTTATCAAAGTAAAACAGTATGATCAGATATCAGTAGAAATTTGTGGACATAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAATATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGTACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAGAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAACAGAAATTTGTAAGGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAATATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAGTTAGTAGATTTTAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACACCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCCTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTACAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTACTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATATATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAGTAGAGGAACTGAGACAACATCTGTTGAAGTGGGGGTTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATACAACTGCCAGAGAAAGACAGCTGGACTGTCAATGATATACAGAAATTAGTGGGAAAGTTGAATTGGGCAAGTCAGATTTATCCAGGGATTAAAGTAAGGCAGTTATGTAAACTACTTAGAGGAGCCAAGGCGCTGACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATCCTAAAAGAACCAGTACATGGAGTGTATTATGACCCAACAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGTCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAACTTGAAAACAGGAAAATATGCAAGAACAAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAGACCCCTAAATTTAGACTACCCATTCAAAAAGAAACATGGGAATCATGGTGGACAGAATATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAAAAAGAACCCATAATAGGAGCAGAGACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACAAAATCAGGAAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAAGTTATCTCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAGGCACAACCAGATAAAAGCGAATCAGAGTTAGTCAATCAAATAATAGAGCAATTAATACAAAAGGAAAAGGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGGGGAAATGAACAAGTAGATAAATTAGTCAGTTCTGGAATCAGAAAGGTACTATTTTTGGATGGAATAGATAAAGCCCAAGATGAGCATGAAAAATATCACAGTAATTGGAGAGCAATGGCTAGTGACTTTAACATACCACCTGTAGTAGCAAAAGAAATAGTAGCCTGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAATTAGATTGTACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGACAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAACAACAATACATACAGACAATGGCCCCAACTTCACCAGTAATGCAGTTAAGGCCGCCTGTTGGTGGGCAGGGGTCAAACAAGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTCGTAGAATCTATGAATAAACAATTAAAACAAATTATAGGACAGGTAAGAGAGCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAGGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTGGTAATACAAGATAATAGTGACATAAAAGTAGTACCAAGAAGAAAAGCAAAAATTATTAGGGATTATGGAAAACAGATGGCAGGTGATGGTTGTGTGGCAAGTAGACAGGATGAGGATTAG,2084,5095,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MN090376,vif,4338,4919,forward,0.4578680203045684,0,MENRWQVMVVWQVDRMRISMWKSLVKYHIHNSGKARGWCYRHHYESRHPRISSEVHIPLGGEARLVVTTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPDLADQLIHLYYFNCFSESAIRQAILGHRVSPRCEYQAGHNKVGSLQYLALIALIPPKRKKPPLPSVRKLTEDRWKEPQRTKGHRESHTMNGH,ATGGAAAACAGATGGCAGGTGATGGTTGTGTGGCAAGTAGACAGGATGAGGATTAGCATGTGGAAAAGCTTAGTAAAATACCATATACATAATTCAGGGAAGGCTAGGGGATGGTGTTATAGACATCACTATGAAAGCAGACATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGAGGAGAGGCTAGACTAGTAGTGACAACATATTGGGGACTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAAGGAGTCTCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTGACCTAGCAGACCAACTAATTCATCTGTATTATTTTAATTGTTTTTCAGAATCTGCTATAAGACAAGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAATAAGGTAGGATCTCTGCAGTACTTGGCACTAATAGCATTAATACCACCAAAAAGGAAAAAGCCACCTTTGCCTAGTGTGAGGAAACTGACAGAGGATAGATGGAAAGAGCCCCAGAGGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAG,5040,5618,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG -MN090376,vpr,4859,5149,forward,0.35051546391752586,0,MERAPEDQGPQREPYNEWTLELLEELKREAVRHFPRAWLHGLGQHIYETYGDTWAGVEALIRILQQLVFIHFRIGCQHSRIGILTHRRARNGASRS,ATGGAAAGAGCCCCAGAGGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAGAACTTTTAGAAGAGCTTAAAAGAGAAGCTGTTAGGCACTTTCCTAGGGCTTGGCTTCATGGCCTAGGACAACATATCTATGAAACTTATGGAGATACTTGGGCAGGAGTGGAAGCTCTAATAAGAATTCTGCAACAACTGGTGTTTATTCATTTCAGAATTGGATGTCAGCACAGCAGGATAGGCATTCTTACCCATAGAAGAGCAAGAAATGGAGCCAGTAGATCCTAG,5558,5848,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -MN090376,tat_exon1,5130,5344,forward,0.5416666666666667,0,MEPVDPRLEPWKHPGSQPRTACTSCYCKKCCYHCPRCFLTKGLGISYGRKKRRQRRRTPQDSRNHQVSLSKX,ATGGAGCCAGTAGATCCTAGACTGGAGCCCTGGAAGCATCCAGGGAGTCAGCCTAGGACTGCGTGCACCAGTTGCTATTGTAAAAAGTGTTGCTATCATTGCCCGCGATGCTTCTTAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAGGATAGCCGGAACCATCAAGTTTCTTTATCAAAGCA,5829,6043,MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MN090376,rev_exon1,5269,5344,forward,0.7517241379310344,0,MAGRSGDSDEELLRIAGTIKFLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAGGATAGCCGGAACCATCAAGTTTCTTTATCAAAGCA,5968,6043,MAGRSGDSDEELIRTVRLIKLLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MN090376,vpu,5361,5630,forward,0.8152173913043479,0,MQSLEILAIVALVVAFIIAIVVWSIVLIEYRKIRRQKRIDRILDRIRERAEDSGNESEGDQEELAALVEMGNHAHLDMGHHAPWDVNDL,ATGCAATCTTTAGAAATATTAGCAATAGTAGCATTAGTAGTAGCATTTATAATAGCAATAGTCGTTTGGTCTATAGTACTCATAGAATATAGGAAAATAAGAAGACAAAAGAGAATAGACAGGATACTTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGACCAAGAAGAATTGGCAGCCCTCGTGGAGATGGGGAACCATGCTCATTTGGATATGGGCCACCATGCTCCTTGGGATGTCAATGACCTGTAA,6060,6308,MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*,ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG -MN090376,env,5524,8109,forward,0.6166294642857151,0,MRVKGTKKNWQPSWRWGTMLIWIWATMLLGMSMTCNAEDSWVTVYYGVPVWKEAATTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVELNVTENFNAWKNNMVDQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNINSTNINNTNSIEREMTNCSFNVTTVIRDKVQKQYALFYKLDVVQIKDDNTSYNTSYRLINCNSSTITQACPKVSFEPIPIHYCAPAGFAILKCKDKNFNGTGLCKNVSTVQCTHGIRPVVSTQLLLNGSLAEGDVVIRSKNFSNSAQTIIVQLNETVEINCTRPNNNTRRSIHIGPGRAFYATGDIIGDIRQAHCNISRAKWNETLKKIADKLREQFKRETIVFNQSSGGDPEIVMHSFNCRGEFFYCNTTQLFNSTWNSTTGVNGTGANNTEVNIITLPCKIRQIVNMWQEVGKAMYAPPISGQISCSSNITGLLLLRDGGNSNDTDNTTEIFRPGGGDMRDNWRSELYKYKVVQIEPLGVAPTKARRRVVQREKRAVGIGALFLGFLGAAGSTMGAASITLTVQARHLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTNVPWNSSWSNKNLSYIWNNMTWMQWEKEIDNYTEEIYNLIEQSQNQQEKNEQELLELDKWASLWNWFDITKWLWYIKIFIMIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTLLPAARGPDRPEGIGEEGGERDRDRSSHLAHGFLTIIWVDLRSLFLFSYHHLRDLLLIVARGVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRFIEVLQRIFRAIIHIPTRIRQGLERALL,ATGAGAGTGAAGGGGACCAAGAAGAATTGGCAGCCCTCGTGGAGATGGGGAACCATGCTCATTTGGATATGGGCCACCATGCTCCTTGGGATGTCAATGACCTGTAATGCAGAGGACTCGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAGCCACCACTCTATTTTGTGCATCAGATGCCAAAGCATATGACACAGAGGTACACAATGTCTGGGCCACACATGCTTGTGTACCCACAGACCCCAACCCACAAGAAGTAGAATTAAATGTAACAGAAAATTTTAATGCATGGAAAAATAACATGGTAGATCAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAACTGCACTAACATCAATAGCACTAATATAAACAATACCAATAGTATAGAAAGAGAAATGACAAACTGCTCTTTTAATGTCACCACAGTCATAAGAGATAAGGTGCAAAAACAATATGCACTTTTTTATAAACTTGATGTAGTACAAATTAAGGATGATAATACCAGCTATAATACCAGCTATAGGTTGATAAATTGTAACTCCTCAACCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAATTTCAATGGAACAGGACTATGTAAAAATGTTAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGTTGTTAAATGGCAGTCTAGCAGAAGGAGATGTAGTAATTAGATCTAAAAATTTCTCTAACAGTGCTCAAACCATAATAGTACAATTAAACGAAACTGTAGAAATTAATTGTACAAGACCCAACAATAATACAAGAAGAAGTATACATATAGGACCAGGGAGAGCATTTTATGCAACAGGAGACATAATAGGAGATATAAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATGAGACTTTAAAAAAGATAGCTGATAAATTAAGGGAACAATTTAAAAGAGAGACAATAGTCTTTAACCAATCCTCAGGAGGGGACCCAGAGATTGTAATGCACAGTTTTAATTGTAGAGGGGAATTTTTCTACTGTAATACAACACAACTGTTTAATAGTACTTGGAATAGTACTACAGGAGTAAATGGTACAGGAGCAAATAACACAGAAGTAAATATTATCACACTCCCATGCAAGATAAGGCAAATTGTAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGTTGTTCATCAAATATTACAGGGCTACTACTACTAAGAGATGGTGGTAATAGCAATGACACGGATAACACGACTGAAATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTACAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAGGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTCTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCATCAATAACGCTGACGGTACAAGCCAGACATTTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATATGTTGCAGCTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTAGAAAGATACCTTAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATCTGCACCACTAATGTGCCTTGGAATAGTAGTTGGAGTAATAAAAATCTGAGTTACATTTGGAATAACATGACCTGGATGCAGTGGGAAAAAGAAATTGACAATTACACAGAGGAAATATACAACTTAATTGAACAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTGACATAACAAAGTGGCTGTGGTATATAAAAATATTTATAATGATAGTAGGAGGCTTAATAGGTTTAAGAATAGTTTTTACTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTTCAGACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAGCGCATGGATTCTTAACAATTATCTGGGTCGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCACTTGAGAGACTTACTCTTGATTGTAGCGAGGGGTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGCCAGGAACTAAAGAATAGTGCTGTCAGCTTGCTCAACGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGTTTATAGAAGTATTGCAAAGAATTTTTAGAGCTATTATCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCTTTGCTATAA,6223,8793,MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*,ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA -MN090376,tat_exon2,7691,7783,forward,0.4838709677419355,0,RPSSQPRGDPTGPKESEKKVERETETDPVT,AGACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAG,8375,8467,RPTSQPRGDPTGPKEPKKKVERETETDPFD*,AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG -MN090376,rev_exon2,7692,7967,forward,0.4565217391304348,0,DPPPSREGTRQARRNRRRRWRERQRQIQSLSAWILNNYLGRPAEPVPLQLPPLERLTLDCSEGCGTSGTQGVGSPQVLVESPAVLEPGTKE,GACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAGCGCATGGATTCTTAACAATTATCTGGGTCGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCACTTGAGAGACTTACTCTTGATTGTAGCGAGGGGTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGCCAGGAACTAAAGAATAG,8376,8651,DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*,GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG -MN090376,nef,8111,8734,forward,0.6291866028708133,0,MGGIFSKRNGGGWPAIRERMRRAQPAADGVGAASRDLERHGALTSNNTSANNADCAWLEAQEEGEEVGFPVRPQVPLRPMTYKGAVDLSHFLKEKGGLEGLIYSQQRQDILDLWVYNTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPMEPEDVEKATEGEDNILLHPVNQHGMDDSEREVLVWKFDSRLAFRHVAREIHPEYYKNC,ATGGGAGGCATATTCTCAAAACGTAATGGGGGTGGGTGGCCTGCAATAAGGGAAAGAATGAGACGAGCTCAGCCAGCAGCAGATGGGGTAGGAGCAGCATCTCGAGACCTGGAAAGACATGGAGCTCTCACAAGTAACAATACATCAGCTAACAATGCTGATTGTGCCTGGCTGGAAGCACAGGAAGAGGGGGAAGAAGTGGGTTTCCCAGTCAGACCTCAGGTACCTTTAAGACCAATGACATACAAGGGAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGTTAATTTACTCCCAACAAAGACAAGATATCCTTGATCTGTGGGTCTACAACACACAGGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATAAGATATCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAATGGAACCAGAGGATGTAGAAAAGGCCACTGAAGGAGAGGACAACATCTTGTTACACCCTGTGAACCAGCATGGGATGGACGACTCAGAGAGAGAAGTGTTAGTGTGGAAGTTTGACAGCCGCCTAGCATTCCGTCACGTGGCCCGAGAGATACATCCGGAGTACTACAAGAACTGCTGA,8795,9415,MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA -MK115581.1,gag,680,2179,forward,0.2675944333996021,0,MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE,ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGACGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGCTAATGTAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA,789,2291,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA -MK115581.1,pol,1972,4983,forward,0.15109343936381703,0,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTACTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG,2084,5095,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK115581.1,vif,4928,5506,forward,0.36269430051813467,0,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG,5040,5618,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG -MK115581.1,vpr,5446,5736,forward,0.2886597938144331,0,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS,ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG,5558,5848,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -MK115581.1,tat_exon1,5717,5931,forward,0.25,0,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA,5829,6043,MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115581.1,rev_exon1,5856,5931,forward,0.46153846153846145,0,MAGRSGDSDEELLKAVRLIKILYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA,5968,6043,MAGRSGDSDEELIRTVRLIKLLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115581.1,vpu,5948,6193,forward,0.727586206896552,0,MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL,ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAAAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG,6060,6308,MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*,ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG -MK115581.1,env,6111,8651,forward,0.5241695303550973,0,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL,ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGAAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAAAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA,6223,8793,MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*,ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA -MK115581.1,tat_exon2,8233,8325,forward,0.29032258064516125,0,RPTSQPRGDPTGPKESKKKVEKETETDQFD,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG,8375,8467,RPTSQPRGDPTGPKEPKKKVERETETDPFD*,AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG -MK115581.1,rev_exon2,8234,8509,forward,0.3913043478260869,0,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG,8376,8651,DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*,GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG -MK115581.1,nef,8653,9267,forward,0.5333333333333332,0,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC,ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTATGAGCCTGCATGGGATGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA,8795,9415,MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA -MK115690.1,gag,777,2285,forward,0.19661354581673307,0,MGARASVLSGGELDKWEKIRLRPGGKKKYRLKHIVWASRELERFAVNPGLLETSEGCRQIMGQLQPALQTGSEELRSLYNTVATLYCVHQRIEVKDTKEALEKVEEEQNKSKKKAQQAAAGAGNSSQTSTSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQATNSAAIMMQRGNFRNQRKSVKCFNCGKDGHIAKNCRAPRRKGCWKCGKEGHQMKDCPERQANFLGKIWPSHKGRPGNFLQNRPEPTAPPEESFRFGEETTTPSQKQEKIDQDLYPLASLKSLFGNDPSSQ,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGACAAGTGGGAAAAAATTCGGTTGAGGCCAGGGGGAAAGAAAAAATATAGATTAAAGCATATCGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAACCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATAATGGGACAGCTACAACCAGCCCTTCAGACAGGATCAGAAGAACTTAGATCATTGTATAATACAGTAGCAACCCTCTATTGTGTACATCAAAGGATAGAGGTAAAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGGCGCAGGAAACAGCAGTCAGACCAGCACCAGCCAAAATTACCCTATAGTACAGAACATTCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTCTCAGCACTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTACATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAGCCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACTAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCCACCAGCATTCTAGACATAAGACAAGGACCAAAGGAGCCCTTTAGAGACTATGTAGATCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTAGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGTCCAGCAGCTACACTAGAGGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGTCATAAAGCAAGAGTTTTAGCGGAAGCAATGAGCCAAGCAACAAATTCAGCTGCCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAAAGTGTTAAGTGTTTTAATTGCGGCAAAGATGGGCACATAGCAAAAAATTGCAGGGCCCCTAGAAGAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTCCAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGAGGAGAGCTTCAGGTTTGGGGAGGAGACAACAACTCCCTCTCAGAAGCAGGAGAAGATAGACCAGGACCTGTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAA,789,2291,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA -MK115690.1,pol,2078,5089,forward,0.14129353233830844,0,FFRENLAFPQGEAREFSSEQTRANSPTRGELQVWGGDNNSLSEAGEDRPGPVSFSFPQITLWQRPLVTIKVGGQLKEALLDTGADDTVLEDMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGVRYQYNVLPQGWKGSPAIFQSSMTKILEPYRKQNPNIVIYQYMDDLYVGSDLEIGQHRIKIEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIALPEKESWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGAKALTEVIPLTKEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQEQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKSGKAGYVTNRGRQKVIPLTDTTNQKTELHAIYLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQEDHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQEAAYFILKLAGRWPVATIHTDNGSNFTSATVKAACWWAGIKQEFGIPYNPQSQGVVESMNNELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDVIATDLQTKELQKQITKIQHFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSEIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGAGGAGAGCTTCAGGTTTGGGGAGGAGACAACAACTCCCTCTCAGAAGCAGGAGAAGATAGACCAGGACCTGTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGGTAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGACATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAAATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGCACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATAGGGCCTGAGAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGACTTCAGAGAACTAAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCCTATTTTTCAGTTCCCTTAGACAAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGGTTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAACCTTATAGAAAACAAAATCCAAACATAGTTATATATCAATACATGGATGATTTGTATGTAGGATCTGACCTAGAAATAGGGCAGCATAGAATAAAAATAGAAGAACTGAGACAACATCTGTTGAAGTGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGGTATGAACTCCATCCTGACAAGTGGACAGTACAGCCTATAGCGCTGCCAGAAAAAGAAAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAACTGGGCAAGTCAGATCTACCCAGGAATTAAAGTAAGGCAATTATGTAAACTACTTAGGGGAGCCAAAGCACTAACAGAAGTAATACCACTAACAAAAGAAGCAGAGCTAGAATTAGCAGAAAACAGGGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGAGCAAGGCCAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTGACAGAGGCAGTGCAAAAAATAGCCACAGAAAGCATAGTGATATGGGGAAAGACTCCTAAATTTAGACTACCCATACAAAAAGAAACATGGGAAGCATGGTGGACAGAATATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAACAGGGAAACTAAATCAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTATCCCCCTAACTGACACAACAAATCAGAAGACTGAGTTACATGCAATTTATCTAGCTTTGCAAGATTCGGGATTAGAAGTAAATATAGTAACAGACTCACAGTATGCATTAGGAATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTGTTTTTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTGGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTCATTCCAGCAGAGACAGGGCAGGAAGCAGCATACTTTATTTTAAAATTAGCAGGACGATGGCCAGTAGCAACAATACATACAGACAATGGCAGCAATTTCACCAGTGCTACAGTTAAGGCCGCCTGCTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAACAATGAATTGAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATTCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACGTAATAGCAACAGACTTACAAACTAAAGAACTACAAAAACAAATTACAAAAATTCAACATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGAAATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATTATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG,2084,5095,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK115690.1,vif,5034,5612,forward,0.4093264248704662,0,MENRWQVMIVWQVDRMRIRRWHSLVKHHIYISGKARGWVYKHHYENTHPRISSKVYIPLGEARLAVTTYWGLHTGERDWHLGQGVSIEWRKKEYSTQVDPNLADQLIHLYYFDCFSESAIRNVILGHRVSPRCEYQAGHNKVGSLQYLALTALVTPKKTKPPLPSITKLTEDRWNKPQRTKGHRGNHTMNGH,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAAGATGGCACAGTTTAGTAAAACACCATATATATATTTCAGGGAAAGCTAGAGGATGGGTTTATAAACATCACTATGAAAACACTCATCCAAGAATAAGTTCAAAAGTATACATCCCACTAGGGGAAGCTAGACTGGCAGTAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCACTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGGAATATAGCACACAAGTAGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGTCATATTAGGACATAGAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTTTACAGTACTTGGCACTAACAGCATTAGTAACACCAAAAAAGACAAAGCCACCTTTGCCTAGTATTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAGAACCAAGGGCCACCGAGGGAACCATACAATGAATGGGCACTAG,5040,5618,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG -MK115690.1,vpr,5552,5842,forward,0.375,0,MEQAPENQGPPREPYNEWALELLEELKSEAVRHFPRMWLHGLGQHIYETYGDTWTGVEALIRSLQQLLFIHFRIGCRHSRIGITPQRRTRNGASRS,ATGGAACAAGCCCCAGAGAACCAAGGGCCACCGAGGGAACCATACAATGAATGGGCACTAGAGCTTTTAGAGGAGCTTAAGAGTGAAGCTGTTAGACATTTTCCTAGGATGTGGCTCCATGGCTTAGGGCAACACATCTATGAGACTTATGGGGATACTTGGACAGGAGTGGAAGCCTTAATAAGAAGTCTGCAACAACTGCTGTTCATTCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGAATTACTCCACAGAGGAGAACAAGAAATGGAGCCAGTAGATCCTAA,5558,5848,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -MK115690.1,tat_exon1,5823,6037,forward,0.41666666666666674,0,MEPVDPNLEPWKHPGSQPKTACTTCYCKQCCYHCQVCFITKGLGISYGRKKRRQRRGSPQDSQIHQASLSKX,ATGGAGCCAGTAGATCCTAATCTAGAGCCCTGGAAGCATCCAGGAAGCCAGCCTAAGACTGCTTGTACCACTTGCTATTGTAAACAGTGTTGCTATCATTGCCAAGTTTGTTTCATAACCAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAACGACGAGGATCTCCTCAAGACAGTCAGATTCATCAAGCTTCTCTATCAAAGCA,5829,6043,MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115690.1,rev_exon1,5962,6037,forward,0.5769230769230769,0,MAGRSGDNDEDLLKTVRFIKLLYQSX,ATGGCAGGAAGAAGCGGAGACAACGACGAGGATCTCCTCAAGACAGTCAGATTCATCAAGCTTCTCTATCAAAGCA,5968,6043,MAGRSGDSDEELIRTVRLIKLLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115690.1,vpu,6054,6299,forward,0.5848837209302327,0,MQSLAILAIVALVVAAIIAIVVWTIVFIEYRKILRQRKIDRLIDRIRERAEDSGNESEGDHEELSALMEMGHHAPWDVDDL,ATGCAATCTTTAGCAATATTAGCAATAGTAGCATTAGTAGTAGCAGCCATAATAGCAATAGTTGTGTGGACCATAGTATTCATAGAATATAGGAAAATATTAAGACAAAGGAAAATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGATAGTGGCAATGAGAGTGAAGGGGATCATGAAGAATTATCAGCACTTATGGAGATGGGGCATCATGCTCCTTGGGATGTTGATGATCTGTAG,6060,6308,MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*,ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG -MK115690.1,env,6217,8799,forward,0.6138702460850114,31,MRVKGIMKNYQHLWRWGIMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYDTEAHNVWATHACVPTDPNPQEIGLENVTENFNMWKNNMVEQMHEDIISLWDQSLKPCVKLTPLCVTLHCTKLEINSTKKTNSTNNGTNINATDDSWGEMKNCSFNTTASIRDKVQREFALFYKLDIVPIDNDDINYRLISCNTSVLTQACPKVSFEPIPIHYCAPAGFAILKCRDKNFNGTGQCKNVSTVQCTHGIRPVVSTQLLFNGSLAEEEVVIKSENITDNTKTIIVQLNASVAIVCTRPNNNTRKSIPIGPGRAFYAAGDIIGDIRRAHCILNKTTWDNTIEQVAKKLREQFENKTIVFSESSGGDPEITMISFNCGGEFFYCNSVQLFNSTWHNNGSSTTGSSSSEGNITLPCKIKQIINMWQEVGKAMYAPPIRGPISCESNITGLLLTRDGGNDANGNNTETFRPGGGNMRDNWRSELYRYKVVKIEPLGVAPTRAQRRVVQREKRAVGLGALFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLKDQQLLGLWGCSGKLICNTAVPWNTSWSNKSLDDIWHNMTWMEWEREIDNYTNIIYSLIEASQTQQEKNEQELLELDKWASLWNWFSISNWLWYIKIFIMIVGGLVGLRIVFTILSIVNRVRKGYSPLSFQTHLPAQRGPDRPEGTGDEGGEQDRDRSTHLVDGFLAIIWVDLRNLCLFLYHRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWRRELKNSAVSLLNATAIAVAEGTDRVIEGLRRAFRAIIHIPRRIRQGLERALQ,ATGAGAGTGAAGGGGATCATGAAGAATTATCAGCACTTATGGAGATGGGGCATCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTTACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAAACACCACTCTATTCTGTGCATCAGATGCTAAAGCATATGATACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCTACAGACCCCAACCCACAAGAAATAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTACATTGCACTAAGTTGGAGATTAATAGCACTAAGAAGACTAATAGCACTAATAATGGTACTAACATCAATGCCACTGATGATAGTTGGGGGGAAATGAAAAACTGCTCTTTCAATACCACTGCAAGCATAAGAGATAAGGTACAGAGAGAATTTGCGCTTTTTTATAAACTTGATATAGTACCAATAGATAATGATGATATCAACTATAGGTTAATAAGTTGTAACACCTCAGTCCTTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAGAGATAAAAATTTCAATGGAACAGGACAATGTAAAAATGTCAGCACAGTGCAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTCAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAAATCTGAAAATATCACAGACAATACTAAAACTATAATAGTACAGCTGAATGCATCTGTAGCAATTGTTTGTACAAGACCCAACAACAATACAAGAAAAAGTATACCTATAGGGCCAGGGAGAGCGTTTTATGCAGCAGGAGATATAATAGGAGACATAAGACGAGCACACTGTATCCTTAACAAAACAACATGGGATAACACAATAGAACAGGTAGCTAAAAAATTAAGAGAACAATTTGAGAATAAGACAATAGTCTTTAGTGAATCCTCGGGAGGGGACCCAGAAATTACAATGATTAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAGTACAACTGTTTAATAGTACTTGGCATAATAATGGGAGTAGTACTACAGGGTCAAGTAGCAGTGAAGGCAATATCACACTCCCATGCAAAATAAAACAAATTATAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGAGGACCAATTAGCTGCGAGTCAAATATTACAGGGTTGCTACTAACAAGAGATGGTGGGAATGACGCTAACGGGAACAACACCGAGACCTTCAGACCTGGAGGAGGAAATATGAGGGACAATTGGCGAAGTGAATTATATAGATACAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAGGGCACAGAGAAGAGTGGTACAGAGAGAAAAAAGAGCAGTGGGTCTCGGAGCCTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATAACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATATGTTGCAACTCACAGTCTGGGGCATCAAACAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGCTTTGGGGTTGCTCTGGAAAACTCATCTGCAACACTGCTGTGCCTTGGAATACTAGTTGGAGTAACAAATCTCTGGATGATATTTGGCATAACATGACCTGGATGGAGTGGGAAAGAGAAATTGACAATTACACAAACATAATATACAGCTTAATTGAGGCATCGCAAACCCAGCAAGAAAAGAATGAACAAGAATTACTAGAATTAGACAAATGGGCAAGTCTGTGGAATTGGTTTAGCATATCAAACTGGCTGTGGTACATAAAAATATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTACTATACTTTCTATAGTGAATAGAGTTAGGAAGGGATACTCACCATTATCGTTTCAGACCCACCTCCCAGCCCAGAGGGGACCCGACAGGCCCGAAGGAACCGGAGACGAAGGTGGAGAGCAAGACAGAGACAGATCCACTCACTTAGTGGATGGATTCTTAGCAATTATCTGGGTCGACCTGCGGAACCTGTGCCTCTTCCTCTACCACCGCTTGAGAGACTTACTCTTGATTGCAGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATACTGGTGGAACCTTCTGCAGTATTGGAGGCGGGAACTAAAGAATAGTGCTGTTAGCTTGCTTAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGAGTTATAGAAGGATTGCGCAGAGCTTTTAGAGCTATTATCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGAGCTTTGCAATAA,6223,8793,MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*,ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA -MK115690.1,tat_exon2,8381,8473,forward,0.5806451612903225,0,RPTSQPRGDPTGPKEPETKVESKTETDPLT,AGACCCACCTCCCAGCCCAGAGGGGACCCGACAGGCCCGAAGGAACCGGAGACGAAGGTGGAGAGCAAGACAGAGACAGATCCACTCACTTAG,8375,8467,RPTSQPRGDPTGPKEPKKKVERETETDPFD*,AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG -MK115690.1,rev_exon2,8382,8657,forward,0.4623655913978495,0,DPPPSPEGTRQARRNRRRRWRARQRQIHSLSGWILSNYLGRPAEPVPLPLPPLERLTLDCSEDCGTSGTQGVGSPQILVEPSAVLEAGTKE,GACCCACCTCCCAGCCCAGAGGGGACCCGACAGGCCCGAAGGAACCGGAGACGAAGGTGGAGAGCAAGACAGAGACAGATCCACTCACTTAGTGGATGGATTCTTAGCAATTATCTGGGTCGACCTGCGGAACCTGTGCCTCTTCCTCTACCACCGCTTGAGAGACTTACTCTTGATTGCAGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATACTGGTGGAACCTTCTGCAGTATTGGAGGCGGGAACTAAAGAATAG,8376,8651,DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*,GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG -MK115690.1,nef,8801,9434,forward,0.5274038461538462,0,MGGKWSKCSIVGWPTVRERIRRAEPAAEGVGAVSRDLEKHGAITSSNANNADCTWLEAQKEEEEVGFPVRPQVPLRPMTYKAALDLSHFLKEKGGLEGIIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRFPLTFGWCFKLVPVEPERVEEENKRENRCLLHPMSQHGMDDPEKEVLQWRFDSRLAFHHVARELHPEYYKN,ATGGGTGGTAAATGGTCAAAATGTAGTATAGTTGGATGGCCTACTGTAAGGGAAAGAATAAGACGAGCAGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAAGCATGGAGCAATCACAAGTAGCAATGCTAACAATGCTGATTGTACCTGGCTGGAAGCCCAAAAAGAAGAGGAGGAGGTAGGCTTTCCAGTCAGGCCTCAGGTACCCTTAAGACCAATGACTTACAAGGCAGCCTTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGATAATTTACTCCCAAAAAAGACAAGATATTCTTGATCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACTAGATTTCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGAGCCAGAGAGGGTAGAAGAGGAGAATAAAAGAGAGAACCGCTGCTTGTTACACCCTATGAGCCAGCATGGGATGGATGACCCAGAGAAAGAAGTGCTACAGTGGAGGTTTGACAGCCGCCTAGCCTTTCACCACGTAGCCAGAGAGCTGCATCCGGAGTACTATAAGAACTAGAACTGCTGACATCTGCTGA,8795,9415,MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA -MK115571.1,gag,579,2078,forward,0.2675944333996021,0,MGARASILSGGKLDSWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE,ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATTCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGTTAATGTAATGATGCAAAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA,789,2291,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA -MK115571.1,pol,1871,4882,forward,0.15109343936381703,0,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEAFLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAAATAGGGGGGCAATTTAAAGAAGCTTTCTTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTAAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTGCTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG,2084,5095,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK115571.1,vif,4827,5405,forward,0.37823834196891193,0,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG,5040,5618,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG -MK115571.1,vpr,5345,5635,forward,0.2886597938144331,0,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS,ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG,5558,5848,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -MK115571.1,tat_exon1,5616,5830,forward,0.25,0,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGCAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA,5829,6043,MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115571.1,rev_exon1,5755,5830,forward,0.46153846153846145,0,MAGRSGDSDEELLKAVRLIKILYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA,5968,6043,MAGRSGDSDEELIRTVRLIKLLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115571.1,vpu,5847,6092,forward,0.6931034482758622,0,MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL,ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG,6060,6308,MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*,ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG -MK115571.1,env,6010,8550,forward,0.5251716247139588,0,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEVHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRDKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGNMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLDQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL,ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAGATAAGATGCAGAAAGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATGATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGGGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGGGACTGAGACCTTCAGACCTGGAGGAGGAAATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGGATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTTAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA,6223,8793,MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*,ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA -MK115571.1,tat_exon2,8132,8224,forward,0.29032258064516125,0,RPTSQPRGDPTGPKESKKKVEKETETDQFD,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG,8375,8467,RPTSQPRGDPTGPKEPKKKVERETETDPFD*,AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG -MK115571.1,rev_exon2,8133,8408,forward,0.423913043478261,0,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG,8376,8651,DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*,GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG -MK115571.1,nef,8552,9166,forward,0.5478260869565217,0,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDNEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC,ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAATGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGATACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTGCACCCTACGAGCCTGCATGGGGTGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA,8795,9415,MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA -MK115514.1,gag,584,2083,forward,0.2675944333996021,0,MGARASILSGGKLDEWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE,ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGAATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGTTAATGTAATGATGCAAAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA,789,2291,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA -MK115514.1,pol,1876,4887,forward,0.14811133200795235,0,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGCTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAATACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTGCTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG,2084,5095,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK115514.1,vif,4832,5410,forward,0.37823834196891193,0,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGDIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGAGATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG,5040,5618,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG -MK115514.1,vpr,5350,5640,forward,0.2886597938144331,0,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS,ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG,5558,5848,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -MK115514.1,tat_exon1,5621,5835,forward,0.25,0,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA,5829,6043,MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115514.1,rev_exon1,5760,5835,forward,0.46153846153846145,0,MAGRSGDSDEELLKAVRLIKILYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA,5968,6043,MAGRSGDSDEELIRTVRLIKLLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115514.1,vpu,5852,6097,forward,0.6931034482758622,0,MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL,ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG,6060,6308,MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*,ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG -MK115514.1,env,6015,8555,forward,0.528604118993135,0,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEVHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNITNTNSSSKEGKMAEEMRNCSFNITTEIRNKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKSVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIVFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL,ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATATTACTAATACCAATAGCAGTAGTAAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAGAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATGATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAGTGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGGGAATAAAACAATAGTCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGGGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTAAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA,6223,8793,MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*,ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA -MK115514.1,tat_exon2,8137,8229,forward,0.29032258064516125,0,RPTSQPRGDPTGPKESKKKVEKETETDQFD,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG,8375,8467,RPTSQPRGDPTGPKEPKKKVERETETDPFD*,AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG -MK115514.1,rev_exon2,8138,8413,forward,0.423913043478261,0,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG,8376,8651,DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*,GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG -MK115514.1,nef,8557,9171,forward,0.5478260869565217,0,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDGEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC,ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACGGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTACGAGCCTGCATGGGGTGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA,8795,9415,MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA -MK115488.1,gag,707,2206,forward,0.2675944333996021,0,MGARASILSGGKLDEWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE,ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGAATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGTTAATGTAATGATGCAAAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA,789,2291,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA -MK115488.1,pol,1999,5010,forward,0.14811133200795235,0,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGCTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAATACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTGCTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG,2084,5095,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK115488.1,vif,4955,5533,forward,0.37823834196891193,0,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGDIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGAGATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG,5040,5618,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG -MK115488.1,vpr,5473,5763,forward,0.2886597938144331,0,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS,ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG,5558,5848,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -MK115488.1,tat_exon1,5744,5958,forward,0.25,0,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA,5829,6043,MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115488.1,rev_exon1,5883,5958,forward,0.46153846153846145,0,MAGRSGDSDEELLKAVRLIKILYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA,5968,6043,MAGRSGDSDEELIRTVRLIKLLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115488.1,vpu,5975,6220,forward,0.6931034482758622,0,MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL,ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG,6060,6308,MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*,ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG -MK115488.1,env,6138,8678,forward,0.528604118993135,0,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEVHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNITNTNSSSKEGKMAEEMRNCSFNITTEIRNKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKSVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIVFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL,ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATATTACTAATACCAATAGCAGTAGTAAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAGAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATGATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAGTGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGGGAATAAAACAATAGTCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGGGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTAAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA,6223,8793,MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*,ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA -MK115488.1,tat_exon2,8260,8352,forward,0.29032258064516125,0,RPTSQPRGDPTGPKESKKKVEKETETDQFD,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG,8375,8467,RPTSQPRGDPTGPKEPKKKVERETETDPFD*,AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG -MK115488.1,rev_exon2,8261,8536,forward,0.423913043478261,0,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG,8376,8651,DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*,GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG -MK115488.1,nef,8680,9294,forward,0.5478260869565217,0,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDGEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC,ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACGGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTACGAGCCTGCATGGGGTGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA,8795,9415,MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA -MK115030.1,gag,176,1684,forward,0.2328685258964145,0,MGARASVLSGGELDRWEKIYLRPGGKKKYRLKHIVWASRELERFAVNPGLLESSEGCRQILGQLQPALQTGSEELRSLYNTVAVLYCVHQRINVKDTKEALDKIEEEQNKSKKKAQQAAADTGNSSNSQVSQNYPIVQNLQGQMVHQPISPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINDEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHTPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTKSASIMVQGGNFRNQRKNVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPAESFRFGEEATAPPQKQETKDQELYPLASLRSLFGNDP,ATGGGTGCGAGAGCGTCAGTATTAAGCGGCGGAGAATTAGATAGATGGGAAAAAATTTACCTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTCAACCCTGGCCTGTTAGAGTCATCAGAAGGCTGCAGGCAAATTCTGGGACAGCTACAACCAGCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAGTCCTCTATTGTGTACATCAAAGGATAAATGTAAAAGACACCAAGGAAGCTCTAGACAAAATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAACAAGCAGCAGCTGACACAGGAAACAGCAGCAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACCTACAGGGGCAAATGGTACATCAACCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAATAGAAGAAAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCACTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAGATGTTAAAAGAAACCATCAATGATGAGGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGCCCTATTGCACCAGGCCAGATGAGGGAACCAAGGGGAAGTGACATAGCAGGAACCACTAGTACCCTTCAGGAACAAATAGGATGGATGACACATACTCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTGGGGTTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTTTGGACATAAGACAAGGACCAAAGGAACCCTTTAGGGATTATGTAGACCGGTTCTATAAAACTCTAAGGGCTGAGCAAGCGTCACAGGATGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAATCCAGATTGTAAGACCATTTTAAAAGCATTAGGACCAGCAGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGGGGACCCGGCCACAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAAATCAGCCTCCATAATGGTGCAGGGAGGCAATTTTAGGAACCAAAGAAAGAATGTTAAGTGTTTCAATTGTGGCAAAGAAGGACACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATTTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTGGGGAAGAGGCAACAGCTCCCCCTCAGAAGCAGGAGACGAAGGACCAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAA,789,2291,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA -MK115030.1,pol,1477,4488,forward,0.22266401590457252,0,FFREDLAFPQGEAREFSPEQTRANSPASRELQVWGRGNSSPSEAGDEGPGTVSFSFPQITLWQRPIISIKIGGQLKEALLDTGADDTVLEEMNLPGKWKPKMIGGIGGFIKVRQYDQVPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALIEICAEMEKEGKITKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKRKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNEAPGVRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKENPDIVIYQYMDDLYVGSDLEIEQHRTKIEELRQHLLGWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPDKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKEPVHGAYYDPSKDLIAEVQKQGGDQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIALEAIVIWGKTPKFKLPIQKETWEMWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRETKLGKAGYVTDRGRQKVVSLMDTTNQRTELHAIHLALQDSGSTVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKIYLAWVPAHKGIGGNEQVDKLVSNGIRKILFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEPMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIAAETGQETAYFILKLAGRWPVKIIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNNELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQISKIQNFRVYYRDSREPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKILRDYGKQMAGDDCVASRQDED,TTTTTTAGGGAAGATTTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTGGGGAAGAGGCAACAGCTCCCCCTCAGAAGCAGGAGACGAAGGACCAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAAAAATAGGAGGGCAACTAAAAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGGTACCCATAGAAATTTGTGGACATAAAGCTATAGGTACAGTATTAATAGGACCTACACCAGTCAACATAATTGGAAGAAATCTGTTGACCCAGCTTGGTTGCACTTTAAATTTTCCCATCAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAGAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAGGCATTAATAGAAATTTGTGCAGAAATGGAAAAGGAAGGGAAAATTACAAAAATTGGGCCTGAGAATCCATATAATACTCCAGTGTTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCCGAGAACTTAATAAGAGAACACAAGACTTCTGGGAAGTTCAACTAGGAATACCACATCCCGCAGGGTTAAAAAGGAAAAAATCAGTAACAGTACTAGATGTGGGTGATGCATATTTTTCAGTTCCTTTAGACAAAGACTTCAGAAAGTATACTGCATTTACCATACCCAGTATAAACAATGAGGCACCAGGAGTTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGGAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGAGCAGCACAGAACAAAAATAGAGGAACTGAGACAACATCTGTTGGGGTGGGGACTCACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATACAGCTGCCAGACAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAGTTGAATTGGGCAAGCCAGATCTATCCAGGGATTAAAGTAAAGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGAACCAGTACATGGAGCATATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGGGGAGACCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGGAAATATGCAAGAACGAGGGGTGCCCACACTAATGATGTAAAACAGTTAACAGAGGCAGTGCAAAAAATAGCCCTAGAAGCCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCTATACAAAAGGAAACATGGGAAATGTGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAATTTGTCAATACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAGAAAGAACCCATAATAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAGACTAAATTAGGAAAAGCAGGGTATGTTACGGACAGAGGAAGACAAAAGGTTGTCTCCCTAATGGACACAACAAATCAGAGGACTGAGTTACACGCAATTCATCTAGCTTTGCAGGATTCAGGATCAACAGTAAACATAGTGACAGACTCACAATATGCCTTAGGGATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGCTAATAAAAAAGGAAAAGATTTACCTGGCATGGGTCCCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTAATGGAATCAGAAAAATACTATTTCTGGATGGAATAGATAAGGCCCAAGAAGAACATGAAAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAGTGTCAGTTAAAAGGAGAACCTATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTGGATTGTACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATACATAGAAGCAGAAGTTATTGCAGCAGAAACAGGGCAGGAAACAGCATACTTTATCTTAAAATTGGCAGGAAGATGGCCAGTAAAAATAATACATACAGACAATGGCAGCAATTTCACCAGCACTACGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGTATTCCCTACAATCCTCAAAGTCAAGGAGTAGTAGAATCTATGAATAATGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATTCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAACTACAAAAACAAATTTCAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGAACCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCCTTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG,2084,5095,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK115030.1,vif,4433,5011,forward,0.3969072164948453,0,MENRWQVMIVWQVDRMRIRTWNSLVKHHMHISKKAQRWVYRHHYESHNPKTSSEVHIPLGEARLVIKTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPGLADQLIHLYYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALTALITPKRRKPPLPSVTKLTEDRWNKSQRTKGHKGSHTMNGH,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATATTTCAAAGAAAGCTCAGAGATGGGTTTACAGACATCACTATGAAAGCCATAATCCAAAAACAAGTTCAGAAGTACACATCCCATTAGGGGAAGCAAGATTGGTAATAAAAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGCCAGGGAGTATCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTGGCCTGGCAGACCAACTAATTCACCTGTATTATTTTGATTGTTTTTCAGAATCTGCTATAAGGAATGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAGGCAGGACATAACAAGGTAGGATCCCTACAATACTTGGCATTAACAGCATTAATAACACCAAAAAGGAGAAAGCCACCTTTGCCCAGTGTTACAAAATTAACAGAGGACAGATGGAACAAGTCCCAGAGGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGACATTAG,5040,5618,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG -MK115030.1,vpr,4951,5241,forward,0.34375,0,MEQVPEDQGPQREPYNEWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDTWAGVEDIIRTLQQLLFIHFRIGCQHSRIGIIRQRRARNGASRS,ATGGAACAAGTCCCAGAGGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGACATTAGAGCTCTTAGAAGAGCTTAAGAGTGAAGCTGTTAGGCATTTTCCTAGACCATGGCTTCATAGCTTAGGACAATATATCTATGAAACTTATGGAGATACTTGGGCAGGGGTGGAAGACATAATAAGAACCCTGCAACAACTGTTGTTCATTCATTTCAGAATTGGGTGTCAACATAGCAGGATAGGAATTATTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG,5558,5848,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -MK115030.1,tat_exon1,5222,5436,forward,0.5945945945945945,0,MEPVDPSLEPWKHPGSQPMTACNNCYCKRCCFHCQVCFTRKGLGISYGRKKRRQRRRASHSSQNHQAALPEX,ATGGAGCCAGTAGATCCTAGCTTAGAGCCCTGGAAACATCCAGGAAGTCAACCTATGACTGCTTGTAACAATTGCTATTGTAAACGGTGTTGCTTTCATTGCCAAGTTTGCTTCACAAGAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCA,5829,6043,MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115030.1,rev_exon1,5361,5436,forward,0.5925925925925926,0,MAGRSGDSDEELLTAVRIIKRLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCA,5968,6043,MAGRSGDSDEELIRTVRLIKLLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115030.1,vpu,5453,5698,forward,0.8160919540229887,0,MHILEIAAIVGLVVAAIIAIVVWSIVLIEYRKILRQRKIDRLIDRIRERAEDSGNESDGDQEELSAIVEMGHLVPWDGDDM,ATGCATATCTTAGAAATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGTCTATAGTATTAATAGAATATAGGAAAATTTTAAGACAAAGGAAAATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAAAGTGACGGGGACCAGGAGGAATTATCAGCAATTGTGGAGATGGGGCATCTTGTTCCTTGGGATGGTGATGATATGTAG,6060,6308,MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*,ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG -MK115030.1,env,5616,8216,forward,0.6431111111111119,0,MKVTGTRRNYQQLWRWGILFLGMVMICSARNLWVTVYYGVPVWKEATTTLFCASDAKAYETEKHNVWATHACVPTDPSPQEVALENVTETFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDNLNLNRPNNNTCSNNTNYNITEKGEIKNCSFNVTTGIRDRVTKEHALFYKLDVVPIDEGSGNTTGKYRMINCNTSVITQACPKVSFEPIPIHFCAPAGFAILKCKDKKFNGTGECRNVSTVQCTHGIRPVVSTQLLLNGSLAEEEIVIRSANLSNNAKTIIVQLNKSVKINCTRPNNNTRRSIHIGPGRAFYGTDIIGDIRQAHCNISGKDWNDTLKQIVIKLKEKFENKTIVFTQSSGGDPEIVMHSFNCGGEFFYCNTTQLFNSTWNNNTWNGTDNWNGTESNSTITLPCRIKQIINLWQEVGRAMYAPPIQGQIRCSSNITGLLLVRDGGSNNSSNDTETFRPGGGDMRDNWRSELYKYKVVKIEPLGIAPTHAKRRVVQREKRAMGLGAFFLGFLGAAGSTMGAASVTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLALERYLKDQQLLGIWGCSGKLICTTNVPWNISWNPRWNRSLDEIWTNMTWKQWEKEIDNYTDIIYNLIEQSQNQQEQNEQDLLALDKWASLWNWFDITQWLWYIKIFIMIVGGLIGLRIVFTILSIVNRVRQGYSPLSLQTLLPTQRGPDRPEGTEEGGGERDRGTSTRLVHGFLALIWDDLRSLFLFSYHRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWSQEIKNSAVSLLNTTAIAVAEGTDRIIEVLQRGFRAILHIPTRIRQGLERALL,ATGAAAGTGACGGGGACCAGGAGGAATTATCAGCAATTGTGGAGATGGGGCATCTTGTTCCTTGGGATGGTGATGATATGTAGTGCCAGAAACTTGTGGGTCACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAACTACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGAGACAGAGAAGCATAATGTTTGGGCAACCCATGCCTGTGTACCCACAGACCCCAGCCCACAGGAAGTAGCATTGGAAAATGTGACAGAAACATTTAACATGTGGAAAAATGACATGGTAGAGCAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTGAAACTAACCCCACTCTGTGTTACTTTAAATTGCACTGATAATTTAAATCTAAATCGCCCTAACAATAATACTTGTAGTAATAATACTAATTATAATATAACGGAAAAAGGAGAAATAAAAAACTGCTCTTTCAATGTCACCACAGGAATAAGAGATAGGGTGACAAAAGAACATGCACTTTTCTATAAACTTGATGTAGTACCAATAGATGAAGGTAGTGGAAATACTACGGGCAAATATAGGATGATAAATTGTAACACCTCAGTCATTACACAAGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTTTTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAAGATAAGAAATTCAATGGAACAGGAGAATGTAGAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGCTGAACGGCAGTCTAGCAGAAGAAGAGATAGTAATTAGATCTGCCAATCTCTCGAACAATGCTAAAACCATAATAGTACAGCTGAATAAATCTGTAAAAATTAATTGTACAAGACCCAACAATAATACAAGAAGAAGTATACACATAGGACCAGGGAGGGCATTTTATGGAACAGACATAATAGGGGATATAAGACAAGCGCATTGTAACATTAGTGGGAAAGATTGGAATGACACTTTAAAACAGATAGTTATAAAATTAAAAGAAAAATTTGAGAATAAAACAATAGTCTTTACTCAATCCTCAGGAGGGGACCCAGAGATAGTGATGCATAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAATAATACTTGGAATGGTACTGATAATTGGAATGGTACTGAATCAAATAGCACTATCACACTCCCATGCAGAATAAAACAAATTATAAACTTGTGGCAGGAAGTAGGAAGAGCAATGTATGCCCCTCCCATCCAAGGGCAAATTAGATGTTCATCAAATATTACAGGGCTGCTGCTAGTAAGAGATGGTGGAAGCAACAATAGTAGTAATGATACAGAGACCTTCAGGCCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTCAAAATTGAACCATTAGGAATAGCACCCACCCATGCAAAGAGAAGAGTGGTGCAAAGGGAAAAAAGAGCAATGGGACTTGGAGCTTTCTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTTTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTAATGTGCCCTGGAATATTAGTTGGAACCCTAGATGGAATAGATCTCTAGATGAGATTTGGACTAACATGACCTGGAAGCAGTGGGAAAAGGAAATTGACAATTACACAGACATAATATATAACTTAATTGAACAATCACAGAACCAACAAGAACAGAATGAACAAGACTTATTGGCATTAGATAAGTGGGCAAGTTTGTGGAATTGGTTTGACATTACACAGTGGCTATGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGATAGGTTTAAGAATAGTTTTTACTATACTGTCTATAGTAAATAGAGTTAGGCAGGGATACTCACCATTGTCATTGCAGACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAGGAACAGAAGAAGGAGGTGGAGAGAGAGACAGAGGCACATCCACGAGATTAGTGCATGGATTCTTAGCACTCATCTGGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAGAGACTTACTCTTGATTGCGGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCACTGAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAAATAAAGAATAGTGCAGTTAGCTTGCTCAACACCACAGCAATAGCAGTAGCTGAGGGGACAGATAGGATTATAGAAGTATTACAAAGAGGCTTTAGAGCTATTCTCCACATACCTACACGAATAAGACAGGGCTTAGAAAGGGCTTTGCTATAA,6223,8793,MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*,ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA -MK115030.1,tat_exon2,7798,7890,forward,0.5806451612903225,0,RPSSQPRGDPTGPKEQKKEVERETEAHPRD,AGACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAGGAACAGAAGAAGGAGGTGGAGAGAGAGACAGAGGCACATCCACGAGATTAG,8375,8467,RPTSQPRGDPTGPKEPKKKVERETETDPFD*,AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG -MK115030.1,rev_exon2,7799,8074,forward,0.423913043478261,0,DPPPNPEGTRQARRNRRRRWRERQRHIHEISAWILSTHLGRPAEPVPLQLPPLERLTLDCGEDCGTSGTQGVGSTEVLVESPAVLESGNKE,GACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAGGAACAGAAGAAGGAGGTGGAGAGAGAGACAGAGGCACATCCACGAGATTAGTGCATGGATTCTTAGCACTCATCTGGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAGAGACTTACTCTTGATTGCGGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCACTGAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAAATAAAGAATAG,8376,8651,DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*,GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG -MK115030.1,nef,8218,8859,forward,0.7375565610859729,0,MGNKLSRGLRAGWPAIRERMRRARPVREPEPAAAGVGAASRDLERHGALTSSNTAATNTDVACLEAQQEEEEVGFPVRPQVPLRPMTYKGALDISHFLKEKGGLDGLIYSKRRQDILDLWLYNTQGYFPDWQNYTPGPGVRFPLCFGWCFKLVPVDPDKVEEASVGENNCLLSPENLHGMEDEHREVLQWRFDSRLAFHHVARELHPEYYKDC,ATGGGTAACAAGTTGTCAAGAGGGCTCAGGGCTGGATGGCCTGCCATAAGGGAGAGAATGAGACGAGCTAGGCCAGTAAGAGAGCCAGAGCCAGCAGCAGCTGGGGTGGGAGCAGCATCTCGAGACCTGGAAAGACATGGAGCACTTACAAGTAGCAATACAGCAGCTACCAATACTGATGTTGCCTGCCTGGAAGCACAACAGGAGGAGGAAGAGGTGGGTTTTCCAGTCAGACCTCAGGTACCTTTAAGGCCAATGACTTACAAGGGAGCTCTAGATATTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGATGGGCTAATTTACTCCAAAAGAAGACAAGACATCCTTGATCTGTGGCTCTACAACACACAAGGCTACTTCCCTGACTGGCAGAACTACACACCAGGGCCAGGAGTCAGATTTCCACTGTGTTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGATAAGGTAGAAGAGGCCAGTGTAGGAGAGAACAACTGCTTGTTAAGCCCCGAAAACCTGCATGGAATGGAGGACGAACACAGAGAAGTATTGCAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTATTACAAGGACTGCTGA,8795,9415,MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA -MK115498.1,gag,663,2162,forward,0.2675944333996021,0,MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE,ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGACGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGCTAATGTAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA,789,2291,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA -MK115498.1,pol,1955,4966,forward,0.15109343936381703,0,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTACTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG,2084,5095,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK115498.1,vif,4911,5489,forward,0.36269430051813467,0,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG,5040,5618,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG -MK115498.1,vpr,5429,5719,forward,0.2886597938144331,0,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS,ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG,5558,5848,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -MK115498.1,tat_exon1,5700,5914,forward,0.25,0,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA,5829,6043,MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115498.1,rev_exon1,5839,5914,forward,0.46153846153846145,0,MAGRSGDSDEELLKAVRLIKILYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA,5968,6043,MAGRSGDSDEELIRTVRLIKLLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115498.1,vpu,5931,6176,forward,0.727586206896552,0,MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL,ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAAAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG,6060,6308,MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*,ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG -MK115498.1,env,6094,8634,forward,0.5241695303550973,0,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL,ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGAAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAAAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA,6223,8793,MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*,ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA -MK115498.1,tat_exon2,8216,8308,forward,0.29032258064516125,0,RPTSQPRGDPTGPKESKKKVEKETETDQFD,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG,8375,8467,RPTSQPRGDPTGPKEPKKKVERETETDPFD*,AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG -MK115498.1,rev_exon2,8217,8492,forward,0.3913043478260869,0,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG,8376,8651,DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*,GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG -MK115498.1,nef,8636,9250,forward,0.5333333333333332,0,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC,ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTATGAGCCTGCATGGGATGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA,8795,9415,MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA -MK115211.1,gag,250,1752,forward,0.20813492063492078,0,MGARASVLSGGELDKWEKIRLRPGGKKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPALQTGSEELKSLFNTVAVLYCVHQKIEIKDTKEALDKIEEEQNKSKKKTQQAAADTGHNSQVSQNYPIVQNMQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSNAVMMQKGNFRSQRKMVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFPQSRTEPTAPPEESFSFGEKTTTPSQKQEPIDQELYPLSSLKSLFGNDP,ATGGGTGCGAGAGCGTCGGTATTAAGCGGGGGAGAATTAGACAAATGGGAGAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAGATATAAGTTAAAACATATAGTATGGGCAAGCAGGGAACTGGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCAGCCCTTCAGACAGGATCAGAGGAACTTAAATCATTATTTAATACAGTAGCAGTCCTCTATTGTGTGCATCAAAAGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAACAAAACAAAAGTAAGAAAAAGACACAGCAAGCAGCAGCTGACACAGGACATAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATGCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTGAAAGTAGTAGAAGAGAAGGCATTTAGCCCAGAAGTAATACCCATGTTTTCAGCCTTATCAGAAGGAGCCACCCCACAAGATTTGAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACTATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCGATTGCACCAGGCCAGATGAGAGAGCCGAGGGGAAGTGACATAGCAGGAACCACCAGTACCCTTCAGGAGCAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTACAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGATCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCAAACCCAGATTGTAAAACCATTTTAAAAGCATTGGGTCCAGCAGCTACACTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCGGAAGCAATGAGCCAAGTAACAAACTCAAATGCCGTGATGATGCAGAAAGGCAATTTTAGGAGCCAAAGAAAGATGGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCAAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCAGGGAACTTTCCTCAAAGCAGGACAGAACCAACAGCCCCACCAGAGGAGAGCTTCAGTTTTGGGGAAAAGACAACAACTCCTTCTCAGAAGCAGGAGCCGATAGACCAGGAACTATACCCTCTGTCTTCCCTCAAATCACTCTTTGGCAACGACCCTTAGTCACAGTAA,789,2291,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA -MK115211.1,pol,1545,4556,forward,0.2109018830525271,0,FFRENLAFPQGKARELSSKQDRTNSPTRGELQFWGKDNNSFSEAGADRPGTIPSVFPQITLWQRPLVTVRIGGQLKEALLDTGADDTVLEDMNLPGRWKPKMIGGIGGFIKVKQYDQISVEICGHKAIGTVLIGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALTEICKEMEKEGKISRIGPENPYNTPIFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKVEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGAKALTEVVPLTEEAELELAENREILKEPVHGVYYDPTKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWESWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRETKSGKAGYVTDRGRQKVISLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIQKEKVYLAWVPAHKGIGGNEQVDKLVSSGIRKVLFLDGIDKAQEDHEKYHSNWRAMASDFNLPPVIAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVTTIHTDNGPNFTSNAVKAACWWAGVKQEFGIPYNPQSQGVVESMNKQLKQIIGQVREQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKVKIIRDYGKQMAGDGCVASRQDED,TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCAGGGAACTTTCCTCAAAGCAGGACAGAACCAACAGCCCCACCAGAGGAGAGCTTCAGTTTTGGGGAAAAGACAACAACTCCTTCTCAGAAGCAGGAGCCGATAGACCAGGAACTATACCCTCTGTCTTCCCTCAAATCACTCTTTGGCAACGACCCTTAGTCACAGTAAGGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTGTTAGAAGACATGAATTTACCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAAACAGTATGATCAGATATCAGTAGAAATTTGTGGACATAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAATATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGTACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAGAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAACAGAAATTTGTAAGGAAATGGAAAAGGAAGGGAAAATTTCAAGAATTGGGCCTGAAAATCCATACAATACTCCAATATTTGCAATAAAGAAAAAAGACAGTACTAAATGGAGAAAGTTAGTAGATTTTAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACACCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCCTAGATAAAGACTTCAGGAAATATACTGCATTTACCATACCTAGTACAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTACTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCTGACATAGTTATATATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAGTAGAGGAACTGAGACAACATCTGTTGAAGTGGGGGTTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATACAACTGCCAGAGAAAGACAGCTGGACTGTCAATGATATACAGAAATTAGTGGGAAAGTTGAATTGGGCAAGTCAGATTTATCCAGGGATTAAAGTAAGGCAGTTATGTAAACTACTTAGAGGAGCCAAGGCGCTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATCCTAAAAGAACCAGTACATGGAGTGTATTATGACCCAACAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGTCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAACTTGAAAACAGGAAAATATGCAAGAACAAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAGACCCCTAAATTTAGACTACCCATTCAAAAAGAAACATGGGAATCATGGTGGACAGAATATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAAAAAGAACCCATAATAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACAAAATCAGGAAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAAGTTATTTCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAGGCACAACCAGATAAAAGCGAATCAGAGTTAGTCAATCAAATAATAGAGCAATTAATACAAAAGGAAAAGGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGGGGAAATGAACAAGTAGATAAATTAGTCAGTTCTGGAATCAGAAAGGTACTATTTTTGGATGGAATAGATAAAGCCCAAGAAGATCATGAAAAATATCACAGTAATTGGAGAGCAATGGCTAGTGACTTTAACCTACCACCTGTAATAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCTATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAATTAGATTGTACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCTAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGACAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAACAACAATACATACAGACAATGGCCCCAACTTCACCAGTAATGCAGTTAAGGCCGCCTGTTGGTGGGCAGGGGTCAAACAAGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTTGTAGAATCTATGAATAAACAATTAAAACAAATTATAGGGCAGGTAAGAGAGCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAGGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTGGTAATACAAGATAATAGTGACATAAAAGTAGTACCAAGAAGAAAAGTAAAAATTATTAGGGATTATGGAAAACAGATGGCAGGTGATGGTTGTGTGGCAAGTAGACAGGATGAGGATTAA,2084,5095,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK115211.1,vif,4501,5082,forward,0.4215384615384614,0,MENRWQVMVVWQVDRMRINMWKSLVKYHIHNSGKARGWCYRHHYESRHPRISSEVHIPLGGEARLVVTTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPELADQLIHLYYFNCFSESAIRQAILGHRVSPRCEYQAGHNKVGSLQYLALIALIPPKRRKPPLPSVRKLTEDRWNEPQRTKGHRESHTMNGH,ATGGAAAACAGATGGCAGGTGATGGTTGTGTGGCAAGTAGACAGGATGAGGATTAACATGTGGAAAAGCTTAGTAAAATACCATATACATAATTCAGGGAAGGCTAGGGGATGGTGTTATAGACATCACTATGAAAGCAGACATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGAGGAGAGGCTAGACTAGTAGTGACAACATATTGGGGCCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAAGGAGTCTCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTATTTTAATTGTTTTTCAGAATCTGCTATAAGACAAGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAATAAGGTAGGATCTCTGCAGTACTTGGCACTAATAGCATTAATACCACCAAAAAGGAGAAAGCCACCTTTGCCTAGTGTGAGGAAACTGACAGAGGATAGATGGAACGAGCCCCAGAGGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAG,5040,5618,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG -MK115211.1,vpr,5022,5312,forward,0.38144329896907214,0,MERAPEDQGPQREPYNEWTLELLEELKREAVRHFPRAWLHGLGQHIYETYGDTWAGVEALIRTLQQLVFIHFRIGCQHSRIGILTHRRARNGASRS,ATGGAACGAGCCCCAGAGGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAGAACTTTTAGAAGAGCTTAAAAGAGAAGCTGTTAGGCACTTTCCTAGGGCATGGCTTCACGGCCTAGGACAACATATCTATGAAACTTATGGAGATACTTGGGCAGGAGTGGAAGCTCTAATAAGAACTCTGCAACAACTGGTGTTTATTCATTTCAGAATTGGATGTCAGCACAGCAGGATAGGCATTCTTACCCATAGAAGGGCAAGAAATGGAGCCAGTAGATCCTAG,5558,5848,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -MK115211.1,tat_exon1,5293,5507,forward,0.547945205479452,0,MEPVDPRLEPWKHPGSQPKTACTSCYCKRCCYHCPRCFLTKGLGISYGRKKRRQRRRTPQDNQNHQVSLSKX,ATGGAGCCAGTAGATCCTAGACTGGAGCCCTGGAAGCATCCAGGGAGTCAGCCTAAGACTGCTTGCACCAGTTGCTATTGTAAAAGGTGTTGCTATCATTGCCCGCGATGCTTCTTAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAGGATAACCAGAACCATCAAGTTTCTTTATCAAAACA,5829,6043,MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115211.1,rev_exon1,5432,5507,forward,0.7037037037037037,0,MAGRSGDSDEELLRITRTIKFLYQNX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAGGATAACCAGAACCATCAAGTTTCTTTATCAAAACA,5968,6043,MAGRSGDSDEELIRTVRLIKLLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115211.1,vpu,5524,5793,forward,0.847826086956522,0,MQSLEILAIVALVVAFIIAIVVWSIVFIEYRKIRRQKRIDRILDRIRERAEDSGNESEGDQEELAALVEMGNHAHLGMGHHAPWEVNDL,ATGCAATCTTTAGAAATATTAGCAATAGTAGCATTAGTAGTAGCATTTATAATAGCAATAGTCGTTTGGTCTATAGTATTCATAGAATATAGGAAAATAAGAAGACAAAAGAGAATAGACAGGATACTTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGACCAAGAAGAATTGGCAGCCCTCGTGGAGATGGGGAACCATGCTCATTTGGGGATGGGCCACCATGCTCCTTGGGAGGTCAATGACCTGTAA,6060,6308,MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*,ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG -MK115211.1,env,5687,8197,forward,1.2241972477064227,757,MHSFNCRGEFFYCNTTQLFNSTWNSTTGVNGTGANNTEVNIITLPCKIKQIVNMWQEVGKAMYAPPISGQISCSSNITGLLLLRDGGNSNDTDNTTEIFRPGGGDMRDNWRSELYKYKVVQIEPLGVAPTKARRRVVQREKRAVGIGALFLGFLGAAGSTMGAASITLTVQARHLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTNVPWNSSWSNKNLSYIWNNMTWMQWEKEIDNYTEVIYNLIEQSQNQQEKNEQELLELDKWASLWTWFDITKWLWYIKIFIMIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTLLPAARGPDRPEGIGEEGGERDRDRSSHLAHGFLTIIWVDLRSLFLFSYHHLRDLLLIVARGVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRFIEVLQRIFRAIIHIPTRIRQGLERALL,ATGAGAGTGAAGGGGACCAAGAAGAATTGGCAGCCCTCGTGGAGATGGGGAACCATGCTCATTTGGGGATGGGCCACCATGCTCCTTGGGAGGTCAATGACCTGTAATGCAGAAGACTCGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACCACCACTCTATTTTGTGCATCAGATGCCAAAGCATATGACCCAGAGGTACACAATGTCTGGGCCACACATGCTTGTGTACCCACAGACCCCAACCCACAAGAAGTAGAATTAAATGTGACAGAAAATTTTAATGCATGGAAAAATAACATGGTAGATCAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAACTGCACTGAGTTGAAGAATAGCACTACTATAAACAATACCAGTAGTATAGAAGAAGGAGAAATGAAAAACTGTTCTTTTAATGCCACCACAGCAATAAGAGATAAGGTGCAAAAACAATATGCACTTTTTTATAGACTTGATGTAGTACAAATTAAGGATGATAATACCAGCTATAATACCAGATATAGGTTGATAAATTGTAACTCCGCAACCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTAGGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGTTGTTAAATGGCAGTCTAGCAGAAGGAGATGTAGTAATTAGATCTAAAAATTTCTCTAACAGTGCTCAAACCATAATAGTACAATTAAACGAAACTGTAGAAATTAATTGTACAAGACCCAACAATAATACAAGAAAAAGTATACATATAGGACCAGGGAGAGCATTTTATGCAACAGGAGACATAATAGGAGATATAAGAAAAGCACATTGTAACATTAGTAGAGCAAAATGGAATGAGACTTTAAAAAAGATAGCTGATAAATTAAGGGAACAATTTAAAAGAGAGACAATAGTCTTTAACCAATCCTCAGGAGGGGACCCAGAGATTGTAATGCACAGTTTTAATTGTAGAGGGGAATTTTTCTACTGTAATACAACACAACTGTTTAATAGTACTTGGAATAGTACTACAGGAGTAAATGGTACAGGAGCAAATAATACAGAAGTAAATATTATCACACTCCCATGCAAGATAAAGCAAATTGTAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGTTGTTCATCAAATATTACAGGGCTACTACTACTAAGAGATGGTGGTAATAGCAATGACACGGATAACACGACTGAAATCTTCAGACCTGGAGGAGGAGATATGAGAGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTACAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAGGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTCTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCATCAATAACGCTGACGGTACAAGCCAGACACTTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATATGTTGCAGCTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTAGAAAGATACCTTAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATCTGCACCACTAATGTGCCTTGGAATAGTAGTTGGAGTAATAAAAATCTGAGTTACATTTGGAATAACATGACCTGGATGCAGTGGGAAAAAGAAATTGACAATTACACAGAGGTAATATACAACTTAATTGAACAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGACTTGGTTTGACATAACAAAGTGGCTGTGGTATATAAAAATATTTATAATGATAGTAGGAGGCTTAATAGGTTTAAGAATAGTTTTTACTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAGCGCATGGATTCTTAACAATTATCTGGGTCGACCTGCGGAGCCTGTTCCTCTTCAGCTACCACCACTTGAGAGACTTACTCTTGATTGTAGCGAGGGGTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGCCAGGAACTAAAGAATAGTGCTGTCAGCTTGCTCAACGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGTTTATAGAAGTATTGCAAAGAATTTTTAGAGCTATTATCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCTTTGCTATAA,6223,8793,MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*,ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA -MK115211.1,tat_exon2,7779,7871,forward,0.4838709677419355,0,RPSSQPRGDPTGPKESEKKVERETETDPVT,AGACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAG,8375,8467,RPTSQPRGDPTGPKEPKKKVERETETDPFD*,AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG -MK115211.1,rev_exon2,7780,8055,forward,0.4565217391304348,0,DPPPSREGTRQARRNRRRRWRERQRQIQSLSAWILNNYLGRPAEPVPLQLPPLERLTLDCSEGCGTSGTQGVGSPQVLVESPAVLEPGTKE,GACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAGCGCATGGATTCTTAACAATTATCTGGGTCGACCTGCGGAGCCTGTTCCTCTTCAGCTACCACCACTTGAGAGACTTACTCTTGATTGTAGCGAGGGGTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGCCAGGAACTAAAGAATAG,8376,8651,DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*,GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG -MK115211.1,nef,8199,8822,forward,0.6435406698564594,0,MGGIFSKRNGGGWPAIRERMRRAQPAADGVGAASRDLERHGALTSNNTPANNADCAWLEAQEEGEEVGFPVRPQVPLRPMTYKGAVDLSHFLKEKGGLEGLIYSQQRQDILDLWVYNTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPMEPEDVEKATEGEDNILLHPVNQHGMDDPDKEVLVWKFDSRLAFRHVAREIHPEYYKNC,ATGGGAGGCATATTCTCAAAACGTAATGGGGGTGGGTGGCCTGCAATAAGGGAAAGAATGAGACGAGCTCAGCCAGCAGCAGATGGGGTAGGAGCAGCATCTCGAGACCTGGAAAGACATGGAGCTCTCACAAGTAACAATACACCAGCTAACAATGCTGATTGTGCCTGGCTGGAAGCACAGGAAGAGGGGGAAGAAGTGGGTTTCCCAGTCAGACCTCAGGTACCTTTAAGACCAATGACATACAAGGGAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGTTAATTTACTCCCAACAAAGACAAGATATCCTTGATCTGTGGGTCTACAACACACAGGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATAAGATATCCACTGACCTTTGGATGGTGCTTCAAGCTAGTGCCAATGGAACCAGAGGATGTAGAAAAGGCCACTGAAGGAGAGGACAACATCTTGTTACACCCTGTGAACCAGCATGGGATGGACGACCCAGATAAAGAAGTGTTAGTGTGGAAGTTTGACAGCCGCCTAGCATTCCGTCACGTGGCCCGAGAGATACATCCGGAGTACTACAAGAACTGCTGA,8795,9415,MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA -MK115158.1,gag,316,1818,forward,0.3747011952191237,0,MGARASVLSGGELDKWEKIRLRPGGKKQYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPALQTGSEELKSLFNTVAVLYCVHQRIEIKDTKEALDKIEEEQNKSKKKTQQAAADPGNSNQVSQNYPIVQNMQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSASVMMQKGNFRSQRKMVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFPQSRTEPTAPPEESFSFGEKTTTPSQKQEPIDQELYPMSSLKSLFGNDP,ATGGGTGCGAGAGCGTCGGTATTAAGCGGGGGAGAATTAGACAAATGGGAGAAAATTCGGTTAAGGCCAGGGGGAAAGAAACAATATAAGTTAAAACATATAGTATGGGCAAGCAGGGAACTGGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCAGCCCTTCAGACAGGATCAGAGGAACTTAAATCATTATTTAATACAGTAGCAGTCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAACAAAACAAAAGTAAGAAAAAGACACAGCAAGCAGCAGCTGACCCAGGAAATAGCAACCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATGCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTGAAAGTAGTAGAAGAGAAGGCTTTTAGCCCAGAAGTAATACCCATGTTTTCAGCCTTATCAGAAGGAGCCACCCCACAAGATTTGAACACGATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACTATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCGATTGCACCAGGCCAGATGAGAGAACCGAGGGGAAGTGACATAGCAGGAACCACCAGTACCCTTCAGGAGCAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTACAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGATCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCAAACCCAGATTGTAAGACCATTTTAAAAGCATTAGGTCCAGCAGCTACACTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCGGAAGCAATGAGCCAAGTAACAAACTCAGCTTCCGTGATGATGCAGAAAGGCAATTTTAGGAGCCAAAGAAAGATGGTTAAGTGTTTCAATTGTGGCAAAGAAGGACACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCAAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGGAACTTTCCTCAAAGCAGAACAGAACCAACAGCCCCACCAGAGGAGAGCTTCAGTTTTGGGGAAAAGACAACAACTCCTTCTCAGAAGCAGGAGCCGATAGACCAGGAACTATACCCTATGTCTTCCCTCAAATCACTCTTTGGCAACGACCCTTAGTCACAGTAA,234,1730,MGARASVLSGGELDRWEKIRLRPGGKKTYKLKHIVWASRELERFALNPGLLETSEGCRQILGQLQPALQTGSEELRSLFNAVATLYCVHQKIDVKDTKEALDKIEEEQNKSKKKVQQAADTGNNNQVSQNYPIVQNAQGHMIHQPISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGCTPQDLNTMLNTVGGHQAAMQMLKDTINEEAAEWDRLHPVHAGPIPPGQMREPRGSDIAGTTSTLQEQIQWMTSNPPVPVGDIYKRWIILGLNKIVRMYSPVSILDIKQGPKEPFRDYVDRFFKVLRAEQATQDVKNWMTETLLVQNSNPDCKTILKALGPQATLEEMMTACQGVGGPGHKARVLAEAMSQATASNVIMMQKGNYRGQRKIVKCFNCGREGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSSKGRPGNFLQNRPEPTAPPAESLGFGEETTPSPKQEQKEGLYPPLASLRSLFGNDP*SQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTGGATAGATGGGAAAAAATTCGGTTAAGGCCAGGAGGAAAGAAAACATATAAATTGAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCTCTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCAGCCCTTCAGACAGGATCAGAAGAACTTAGATCACTATTTAATGCAGTAGCAACCCTCTATTGTGTGCATCAAAAGATCGATGTAAAAGACACCAAGGAAGCTTTAGATAAAATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGTACAGCAAGCAGCTGACACAGGAAATAACAACCAGGTCAGCCAAAATTACCCTATAGTGCAGAACGCCCAGGGGCACATGATACATCAGCCTATTTCACCTAGAACTTTAAATGCATGGGTAAAGGTGGTAGAAGAAAAGGCTTTTAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGATGCACCCCACAAGATTTAAACACCATGTTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGCTAAAAGACACCATTAATGAGGAAGCTGCAGAATGGGACAGATTACATCCAGTGCATGCAGGACCTATCCCACCAGGCCAGATGAGGGAACCTAGGGGAAGTGATATAGCTGGAACTACCAGTACCCTTCAGGAACAAATACAATGGATGACAAGCAACCCACCTGTCCCAGTGGGAGATATCTATAAAAGATGGATCATCCTAGGATTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTTTGGACATAAAACAAGGGCCAAAAGAACCCTTTAGAGATTATGTGGATAGGTTCTTTAAAGTCCTAAGAGCCGAGCAAGCTACACAGGATGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAACCCAGATTGTAAGACCATTTTAAAAGCATTGGGACCACAGGCTACACTAGAAGAAATGATGACAGCATGCCAAGGAGTGGGAGGGCCCGGCCATAAAGCAAGAGTTTTGGCAGAAGCAATGAGCCAAGCAACAGCTTCAAATGTCATAATGATGCAGAAAGGCAATTATAGGGGCCAGAGAAAGATTGTCAAGTGTTTCAATTGTGGCAGAGAAGGACACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAGTGTGGAAAAGAAGGACACCAAATGAAAGACTGCACTGAAAGACAGGCTAATTTTTTAGGGAAAATTTGGCCTTCCAGCAAGGGGAGGCCAGGGAATTTTCTCCAGAACAGGCCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTGGGGTTTGGAGAGGAGACAACCCCCTCTCCGAAGCAGGAACAGAAAGAGGGACTGTATCCTCCCTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTAGTCACAGTAA -MK115158.1,pol,1611,4622,forward,0.27623762376237626,0,FFRENLAFPQGEARELSSKQNRTNSPTRGELQFWGKDNNSFSEAGADRPGTIPYVFPQITLWQRPLVTVRIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVKQYDQISVEICGHKAIGTVLIGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALTEICKEMEKEGKISKIGPENPYNTPIFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTRILEPFRKQNPDMVIYQYMDDLYVGSDLEIGQHRTKVEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGAKALTEVVPLTEEAELELAENREILKEPVHGVYYDPTKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWESWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRETKSGKAGYVTDRGRQKVISLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIQKEKVYLAWVPAHKGIGGNEQVDKLVSSGIRKVLFLDGIDKAQEDHEKYHSNWRAMASDFNIPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVTTIHTDNGPNFTSNAVKAACWWAGVKQEFGIPYNPQSQGVVESMNKQLKQIIGQVREQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDGCVASRQDED,TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGGAACTTTCCTCAAAGCAGAACAGAACCAACAGCCCCACCAGAGGAGAGCTTCAGTTTTGGGGAAAAGACAACAACTCCTTCTCAGAAGCAGGAGCCGATAGACCAGGAACTATACCCTATGTCTTCCCTCAAATCACTCTTTGGCAACGACCCTTAGTCACAGTAAGGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTGTTAGAAGAAATGAGTCTACCAGGAAGATGGAAGCCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAAACAGTATGATCAGATATCAGTAGAAATTTGTGGACATAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAATATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGTACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAGAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAACAGAAATTTGTAAGGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAATATTTGCAATAAAGAAAAAAGACAGTACTAAATGGAGAAAGTTAGTAGATTTTAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACACCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCCTAGATAAAGACTTCAGGAAATATACTGCATTTACCATACCTAGTACAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTACTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAGAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATGGTTATATATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAGTAGAGGAACTGAGACAACATCTGTTGAAGTGGGGGTTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATACAACTGCCAGAGAAAGACAGCTGGACTGTCAATGATATACAGAAATTAGTGGGAAAGTTGAATTGGGCAAGTCAGATTTATCCAGGGATTAAAGTAAGGCAGTTATGTAAACTACTTAGAGGAGCCAAGGCGCTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATCCTAAAAGAACCAGTGCATGGAGTGTATTATGACCCAACAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGTCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAACTTGAAAACAGGAAAATATGCAAGAACAAGGGGTGCTCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAGACCCCTAAATTTAGACTACCCATTCAAAAAGAAACATGGGAATCATGGTGGACAGAATATTGGCAAGCCACTTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAAAAAGAACCCATAATAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACAAAATCAGGAAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAAGTTATCTCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAGGCACAACCAGATAAAAGCGAATCAGAGTTAGTCAATCAAATAATAGAGCAATTAATACAAAAGGAAAAGGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGGGGAAATGAACAAGTAGATAAATTAGTCAGTTCTGGAATCAGAAAGGTACTATTTTTGGATGGAATAGATAAGGCCCAAGAAGATCATGAAAAATATCACAGTAATTGGAGAGCAATGGCTAGTGACTTTAACATACCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAATTAGATTGTACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCTAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGACAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAACAACAATACATACAGACAATGGCCCCAACTTCACCAGTAATGCAGTTAAGGCCGCCTGTTGGTGGGCAGGGGTCAAACAAGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTTGTAGAATCTATGAATAAACAATTAAAACAAATTATAGGACAGGTAAGAGAGCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAACTTCTTTGGAAAGGTGAGGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTACCAAGAAGAAAAGCAAAAATTATTAGGGATTATGGAAAACAGATGGCAGGTGATGGTTGTGTGGCAAGTAGACAGGATGAGGATTAA,1526,4534,FFRENLAFQQGEAREFSPEQARANSPASRELGVWRGDNPLSEAGTERGTVSSLSFPQITLWQRPLVTVKVGGQMKEALLDTGADDTVLEDINLPGKWKPKMIGGIGGFIKVRQYDQIIIEICGKKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALIEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLYEEFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLKWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLQEKDNWTVNDIQKLVGKLNWASQIYPGIKVRQLCRLLRGTKALTDIVPLTKEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARSRGAHTNDVRQLTEAVQKITTESIVIWGKTPKFRLPIQRETWETWWTDYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRETKLGKAGYVTNKGRQKVVSLTDTTNQKTELQAILLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLINKEKVYLSWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQEEHEKYHNNWRAMASDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESINKELKKIIGQIRDQAEHLKTAVQMAVFIHNFKRRGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPVWKGPAKLLWKGEGAVVIQDNSEIKVVPRRKAKIIRDYGKQMAGDDCVAGRQDED*,TTTTTTAGGGAAAATTTGGCCTTCCAGCAAGGGGAGGCCAGGGAATTTTCTCCAGAACAGGCCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTGGGGTTTGGAGAGGAGACAACCCCCTCTCCGAAGCAGGAACAGAAAGAGGGACTGTATCCTCCCTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTAGTCACAGTAAAAGTAGGGGGACAGATGAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGACATAAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAAATAATCATAGAAATTTGTGGAAAGAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAAATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTGAAGCCAGGAATGGATGGCCCAAAGGTCAAACAATGGCCATTAACGGAAGAAAAAATAAAAGCATTAATAGAAATCTGTACAGAAATGGAAAAGGAAGGGAAAATCTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACAAAATGGAGAAAATTAGTAGATTTTAGAGAACTTAATAAAAGAACTCAAGATTTTTGGGAGGTTCAATTAGGAATACCGCATCCTGCAGGATTAAAAAAGAAAAAATCAGTAACAGTATTAGATGTGGGAGATGCATATTTTTCAGTTCCCTTATATGAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTACTTCCACAGGGATGGAAAGGATCACCAGCAATATTTCAAAGCAGCATGACGAAAATCTTAGAGCCTTTTAGAAAACAGAATCCAGACATAGTGATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTACTGAAATGGGGGCTTACTACACCAGACAAAAAACATCAGAAAGAACCTCCCTTCCTTTGGATGGGTTATGAACTACATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCAAGAAAAGGACAACTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGACAATTATGCAGACTCCTTAGGGGAACCAAGGCACTAACAGACATAGTACCACTAACAAAAGAGGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAAAGAACCAGTACATGGGGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATCTATCAGGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGGTCAAGGGGTGCCCACACTAATGATGTAAGACAGTTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGGAAAACTCCTAAATTTAGACTACCCATACAAAGAGAGACATGGGAAACATGGTGGACAGACTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCCCCTCTAGTAAAGTTATGGTACCAGTTAGAGAAAGAACCCATAATAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAAAGGAAGACAGAAAGTTGTCTCCCTAACTGACACAACCAATCAGAAGACTGAGTTACAAGCAATCCTTCTAGCTTTACAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAGTATGCATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAATTAGTCAATCAAATAATAGAGCAATTAATAAACAAGGAAAAGGTCTACCTGTCATGGGTACCAGCACACAAAGGGATTGGAGGAAATGAACAAGTAGATAAATTAGTTAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGGATAGATAAAGCCCAAGAAGAACATGAAAAATATCACAATAATTGGAGGGCAATGGCCAGTGATTTTAACATCCCACCTGTGGTAGCAAAAGAGATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTGGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGCACTACAGTTAAGGCCGCCTGTTGGTGGGCGGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATAAATAAGGAATTAAAGAAAATTATAGGACAGATAAGAGATCAGGCTGAACATCTTAAGACAGCAGTGCAAATGGCAGTATTCATCCACAATTTTAAAAGAAGAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACCTACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGACCCAGTTTGGAAAGGACCAGCAAAACTACTCTGGAAAGGTGAAGGGGCAGTGGTCATACAAGACAATAGTGAAATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATAATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGATTAG -MK115158.1,vif,4567,5148,forward,0.5815384615384613,0,MENRWQVMVVWQVDRMRINMWKSLVKYHIHNSGKARGWCYRHHYESRHPRISSEVHIPLGGEARLVVTTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPDLADQLIHLYYFNCFSESAIRQAILGHRVSPRCEYQAGHNKVGSLQYLALIALIPPKRRKPPLPSVRKRTEDRWNEPQRTKGHRESHTMNGH,ATGGAAAACAGATGGCAGGTGATGGTTGTGTGGCAAGTAGACAGGATGAGGATTAACATGTGGAAAAGTTTAGTAAAATACCATATACATAATTCAGGGAAGGCTAGGGGATGGTGTTATAGACATCACTATGAAAGCAGACATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGAGGAGAGGCTAGACTAGTAGTGACAACATATTGGGGCCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAAGGAGTCTCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTGACCTAGCAGACCAACTAATTCATCTGTATTATTTTAATTGTTTTTCAGAATCTGCTATAAGACAAGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAATAAGGTAGGATCTCTGCAGTACTTGGCACTAATAGCATTAATACCACCAAAAAGGAGAAAGCCACCTTTGCCTAGTGTGAGGAAACGGACAGAGGATAGATGGAACGAGCCCCAGAGGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAG,4479,5060,MENRWQVMIVWQVDRMRISKWKSLVKYHIHTSKKAKKWFYRHHFESRHPKISSKVHIPLEEENKLVVTTYWGLNTGERDWHLGQGVSIEWRQGKYRTQIDPGLADQLIHIYYFDCFSESAIRKAILGHRVSPRCNYQAGHNKVGSLQYLALTALIATKKAKPPLPSVQKLVXDRWNKPQKTKGHRESHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGATTAGCAAATGGAAAAGTTTAGTTAAATACCATATACATACTTCAAAGAAAGCCAAAAAATGGTTCTATCGACATCACTTTGAAAGCAGGCATCCAAAAATAAGCTCAAAAGTACACATCCCWCTAGAGGAAGAAAATAAATTAGTAGTAACAACATATTGGGGTCTGAATACAGGGGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGCAGGGGAAGTATAGGACACAAATAGACCCTGGCCTGGCAGACCAACTGATTCATATATATTATTTTGATTGTTTTTCAGAATCTGCTATAAGGAAAGCCATATTAGGACATAGAGTTAGCCCTAGGTGTAACTATCAAGCAGGACATAACAAGGTAGGATCTCTACAATATTTGGCACTAACAGCATTAATAGCTACAAAGAAGGCAAAGCCGCCTTTGCCTAGTGTCCAGAAATTAGTARRAGACAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAG -MK115158.1,vpr,5088,5378,forward,0.46875,0,MERAPEDQGPQREPYNEWTLELLEELKREAVRHFPRAWLHGLGQHIYETYGDTWAGVEALIRILQQLVFIHFRIGCQHSRIGILTHRRARNGASRS,ATGGAACGAGCCCCAGAGGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAGAACTTTTAGAAGAGCTTAAAAGAGAAGCTGTTAGGCACTTTCCTAGGGCATGGCTTCACGGCCTGGGACAACATATCTATGAAACTTATGGAGATACTTGGGCAGGAGTGGAAGCTCTAATAAGAATTCTGCAACAACTGGTGTTTATTCATTTCAGAATTGGATGTCAGCACAGCAGGATAGGCATTCTTACCCATAGAAGAGCAAGAAATGGAGCCAGTAGATCCTAG,5000,5290,MEQAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYNTYGDTWEGVEAIIRMLQHLLFIHFRIGCNHSKIGIIRQRRTRNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAAAATGAAGCTGTCAGACATTTTCCTAGGCCATGGCTCCATGGCTTAGGACAACATATCTACAACACCTATGGGGATACTTGGGAGGGAGTTGAAGCTATAATAAGGATGCTGCAACATCTACTGTTTATCCATTTCAGAATTGGGTGCAATCATAGCAAAATAGGCATTATTCGACAGAGAAGAACAAGAAATGGAGCCAGTAGATCCTAG -MK115158.1,tat_exon1,5359,5573,forward,0.5,0,MEPVDPRLEPWKHPGSQPRTACTSCYCKKCCYHCPRCFLTKGLGISYGRKKRRQRRRTPQDNQNHQVSLSKX,ATGGAGCCAGTAGATCCTAGACTGGAGCCCTGGAAGCATCCAGGGAGTCAGCCTAGGACTGCGTGCACCAGTTGCTATTGTAAAAAGTGTTGCTATCATTGCCCGCGATGCTTCTTAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAAGATAACCAGAACCATCAAGTTTCTTTATCAAAACA,5271,5485,MEPVDPRLEPWKHPGSQPRTACTKCYCKRCCFHCQVCFITKGLGISYGRKKRRQRRXPPQSGQTHQDSLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGCCAGCCTAGGACTGCGTGTACCAAATGTTATTGTAAGAGATGTTGCTTTCATTGCCAAGTTTGCTTCATAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAARACCTCCTCAAAGCGGTCAGACTCATCAAGATTCTCTATCAAAGCA -MK115158.1,rev_exon1,5498,5573,forward,0.6923076923076923,0,MAGRSGDSDEELLKITRTIKFLYQNX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAAGATAACCAGAACCATCAAGTTTCTTTATCAAAACA,5410,5485,MAGRSGDSDEBLLKAVRLIKILYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAARACCTCCTCAAAGCGGTCAGACTCATCAAGATTCTCTATCAAAGCA -MK115158.1,vpu,5590,5859,forward,0.720430107526882,0,MQSLEILAIVALVVAFIIAIVVWSIVLIEYRKIRRQKRIDRILDRIRERAEDSGNESEGDQEELAALVEMGNHAHLGMGHHAPWDVNDL,ATGCAATCCTTAGAAATATTAGCAATAGTAGCATTAGTAGTAGCATTTATAATAGCAATAGTTGTTTGGTCTATAGTACTCATAGAATATAGGAAAATAAGAAGACAAAAGAGAATAGACAGGATACTTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGACCAAGAAGAATTGGCAGCCCTCGTGGAGATGGGGAACCATGCTCATTTGGGGATGGGCCACCATGCTCCTTGGGATGTCAATGACCTGTAA,5502,5747,MQSLEILAIVALVVAAILAIVVWSIVLIEYRKILRERKVYKLIDRIRERAEDSGNESEGDQEELSAMVERGHLAPWDINDL*,ATGCAATCTTTAGAGATATTAGCAATAGTAGCACTAGTAGTAGCAGCAATATTAGCAATAGTTGTGTGGTCTATAGTACTCATAGAATATAGGAAAATATTAAGAGAAAGAAAAGTATATAAATTAATTGACAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGCGAGGGAGATCAAGAAGAATTATCAGCAATGGTGGAAAGGGGGCATCTTGCTCCTTGGGATATTAATGATCTGTAA -MK115158.1,env,5753,8314,forward,0.6398200224971886,0,MRVKGTKKNWQPSWRWGTMLIWGWATMLLGMSMTCNAEDSWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVELNVTENFNAWKNNMVDQMHEDIISLWDQSLKPCVKLTPLCVTLNCTELKNSTGEMKNCSFNATTAIRDKVQKQYALFYRLDVVQIKDDNTSYNTRYRLINCNSSTITQACPKVSFEPIPIHYCAPAGFAILKCKDKNFNGTGLCENVSTVQCTHGIRPVVSTQLLLNGSLAEGDVVIRSKNFSNSAQTIIVQLNETVEINCTRPNNNTRKSIHIGPGRAFYATGDIIGDIRQAHCNISRAKWNETLKKIADKLREQFKRETIVFNQSSGGDPEIVMHSFNCRGEFFYCNTTQLFNSTWNSTTGVNGTGANNTEVNIITLPCKIKQIVNMWQEVGKAMYAPPISGQISCSSNITGLLLLRDGGNSNDTDNTTEIFRPGGGDMRDNWRSELYKYKVVQIEPLGVAPTKARRRVVQREKRAVGIGALFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTNVPWNSSWSNKNLSYIWNNMTWMQWEKEIDNYTEEIYNLIEQSQNQQEKNEQELLELDKWASLWTWFDITKWLWYIKIFIMIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTLLPAARGPDRPEGIGEEGGERDRDRSSHLAHGFLTIIWVDLRSLFLFSYHHLRDLLLIVARGVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRFIEVLQRIFRAIIHIPTRIRQGLERALL,ATGAGAGTGAAGGGGACCAAGAAGAATTGGCAGCCCTCGTGGAGATGGGGAACCATGCTCATTTGGGGATGGGCCACCATGCTCCTTGGGATGTCAATGACCTGTAATGCAGAAGACTCGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACCACCACTCTATTTTGTGCATCAGATGCCAAAGCATATGACACAGAGGTACACAATGTCTGGGCCACACATGCTTGTGTACCCACAGACCCCAACCCACAAGAAGTAGAATTAAATGTGACAGAAAATTTTAATGCATGGAAAAATAACATGGTAGATCAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAACTGCACTGAGTTGAAGAATAGCACTGGAGAAATGAAAAACTGTTCTTTTAATGCCACCACAGCAATAAGAGATAAGGTGCAAAAACAATATGCACTTTTTTATAGACTTGATGTAGTACAAATTAAGGATGATAATACCAGCTATAATACCAGATATAGGTTGATAAATTGTAACTCCTCAACCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAATTTCAATGGAACAGGACTATGTGAAAATGTTAGCACAGTACAATGTACACATGGAATTAGACCAGTAGTATCAACTCAACTGTTGTTAAATGGCAGTCTAGCAGAAGGAGATGTAGTAATTAGATCTAAAAATTTCTCTAACAGTGCTCAAACCATAATAGTACAATTAAACGAAACTGTAGAAATTAATTGTACAAGACCCAACAATAATACAAGAAAAAGTATACATATAGGACCAGGGAGAGCATTTTATGCAACAGGAGACATAATAGGAGATATAAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATGAGACTTTAAAAAAGATAGCTGATAAATTAAGGGAACAATTTAAAAGGGAGACAATAGTCTTTAACCAATCCTCAGGAGGGGACCCAGAGATTGTAATGCACAGTTTTAATTGTAGAGGGGAATTTTTCTACTGTAATACAACACAACTGTTTAATAGTACTTGGAATAGTACTACAGGAGTAAATGGTACAGGAGCAAATAATACAGAAGTAAATATTATCACACTCCCATGCAAGATAAAGCAAATTGTAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGTTGTTCATCAAATATTACAGGGCTACTACTACTAAGAGATGGTGGTAATAGCAATGACACGGATAACACGACTGAAATCTTCAGACCTGGAGGAGGAGATATGAGAGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTACAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAGGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTCTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCATCAATAACGCTGACGGTACAAGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATATGTTGCAGCTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTAGAAAGATACCTTAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATCTGCACCACTAATGTGCCTTGGAATAGTAGTTGGAGTAATAAAAATCTGAGTTATATCTGGAATAACATGACCTGGATGCAGTGGGAAAAAGAAATTGACAATTACACAGAGGAAATATACAACTTAATTGAACAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGACTTGGTTTGACATAACAAAGTGGCTGTGGTATATAAAAATATTTATAATGATAGTAGGAGGCTTAATAGGTTTAAGAATAGTTTTTACTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAGCGCATGGATTCTTAACAATTATCTGGGTCGACCTGCGGAGCCTGTTCCTCTTCAGCTACCACCACTTGAGAGACTTACTCTTGATTGTAGCGAGGGGAGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGCCAGGAACTAAAGAATAGTGCTGTCAGCTTGCTCAACGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGTTTATAGAAGTATTGCAAAGAATTTTTAGAGCTATTATCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCTTTGCTATAA,5665,8211,MRAREIKKNYQQWWKGGILLLGILMICNAEKSEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEAHNVWATHACVPTDPNPQEMLLKNVTENFNMWKNNMVDQMHEDIISLWDQSLKPCVKLTPLCVTLNCNNTVTTNASMNNSGEMKNCSFNITTQTRGRKREYALFYNLDVVQLESDKTSTSYRLISCNTSVITQACPKISFEPIPIHYCAPAGFAILKCNDKQFNGTGPCKNVSTVQCTHGIRPVVSTQLLLNGSLAEEDIIIRSQNISDNAKSIIVQLNESISINCIRPGNNTRKSIHMGPGKVFYATGDIIGNIRQAHCNISKAKWNNTLRQIARKLGEQFNNKTIVFNQSSGGDPEIVMHTFNCGGEFFYCNTTSLFNSTWENDTNITEESNSSDDTITLQCKIKQIINLWQEVGKAMYAPPISGYINCSSNITGLILVRDGGNNRTSESETFRPEGGNMKDNWRSELYKYKVVKIEPLGIAPTKAKRRVVQRQKRAVGFGALFLGFLGAAGSTMGAASVALTVQARLLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLKDQQIMGIWGCSGKYICTTAVPWNTSWSNKSYDQIWKNMTWMQWEKEIDNYTSEIYSLIALSQDQQEKNEQELLELDKWASLWNWFDISNWLWYIKIFIMIVGGLVGLRIVFAILSIVNRVRQGYSPLSFQTHHPAPRGPDRPGGIEEEGGERDRDRSGRSASGFLTLIWIDLRSLCLFSYHHLRDLLLIAARIVELLGRRGWELLKYWWNLLQYWSQELKNSAISLLNTTAIVVAEGTDRIIEALQSAGRAVLHIPRRIRQGLERALL*,ATGAGAGCGAGGGAGATCAAGAAGAATTATCAGCAATGGTGGAAAGGGGGCATCTTGCTCCTTGGGATATTAATGATCTGTAATGCTGAAAAGTCTGAAAAGTTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAGGCATATGATACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAATGTTATTGAAAAATGTGACAGAAAATTTTAACATGTGGAAAAACAACATGGTAGATCAGATGCATGAAGATATAATTAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACACCACTTTGTGTTACCTTAAATTGCAATAATACTGTCACCACTAATGCTAGCATGAATAATAGTGGAGAAATGAAAAATTGCTCTTTCAATATCACCACCCAAACGAGAGGGAGAAAGAGAGAATATGCACTTTTTTATAACCTTGATGTAGTGCAATTAGAATCAGACAAAACTAGTACTAGCTATAGGTTGATAAGTTGTAACACCTCAGTCATTACACAGGCTTGTCCAAAGATATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCAGCTGGTTTTGCGATTCTAAAGTGTAATGATAAGCAGTTCAATGGAACAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAATTGCTGTTAAATGGCAGCCTAGCAGAAGAAGATATAATAATCAGGTCTCAAAATATCTCAGATAATGCAAAAAGCATAATAGTACAGTTGAATGAATCTATAAGCATTAATTGTATAAGACCCGGCAATAATACAAGAAAAAGTATACATATGGGACCAGGCAAGGTATTTTATGCAACAGGAGATATAATAGGAAATATAAGACAAGCACATTGTAACATTAGTAAAGCAAAATGGAATAACACTTTAAGACAGATAGCCAGAAAATTAGGAGAACAATTTAACAATAAAACAATAGTCTTTAATCAATCCTCAGGAGGGGACCCAGAAATTGTAATGCATACTTTTAACTGTGGAGGGGAATTTTTCTACTGTAATACAACATCACTGTTTAATAGTACATGGGAGAATGATACAAATATTACTGAAGAATCAAATAGCTCAGATGACACAATCACACTCCAATGCAAAATAAAACAAATTATAAACTTGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGATACATTAACTGTTCATCAAATATCACAGGGCTGATATTAGTAAGAGATGGTGGTAATAACAGAACAAGTGAGAGTGAGACCTTCAGACCTGAAGGAGGAAATATGAAGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAATAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGACAAAAAAGAGCAGTGGGATTTGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGGCGCTGACGGTACAGGCCAGACTATTATTGTCTGGTATAGTGCAGCAGCAGAACAATCTGCTGAGGGCTATTGAGGCGCAACAGCATATGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGATCATGGGGATTTGGGGTTGCTCTGGAAAATACATCTGCACCACTGCTGTGCCTTGGAATACTAGCTGGAGTAATAAATCTTATGATCAGATTTGGAAGAACATGACCTGGATGCAGTGGGAAAAAGAAATTGATAATTACACAAGTGAAATATACAGCTTAATTGCACTATCGCAAGACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTGGACAAATGGGCAAGCTTGTGGAATTGGTTTGACATATCAAACTGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTAGTAGGATTAAGAATAGTTTTTGCAATACTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTGTCATTCCAGACCCACCACCCAGCTCCGAGGGGACCCGACAGGCCCGGAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGTCGATCGGCGAGCGGATTCTTAACACTTATCTGGATCGACCTACGGAGCCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTACTCTTGATTGCAGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAACTCCTCAAATACTGGTGGAACCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCCATTAGCTTGCTTAATACCACAGCAATAGTAGTAGCTGAGGGGACAGATAGAATTATAGAAGCTTTGCAAAGTGCTGGTAGAGCTGTTCTCCACATACCTAGAAGAATAAGACAGGGCTTAGAAAGGGCTTTACTATAA -MK115158.1,tat_exon2,7896,7988,forward,0.6774193548387097,0,RPSSQPRGDPTGPKESEKKVERETETDPVT,AGACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAG,7793,7885,RPTTQLRGDPTGPEESKKKVERETETDPVDR,AGACCCACCACCCAGCTCCGAGGGGACCCGACAGGCCCGGAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGTCGATCGG -MK115158.1,rev_exon2,7897,8172,forward,0.5217391304347827,0,DPPPSREGTRQARRNRRRRWRERQRQIQSLSAWILNNYLGRPAEPVPLQLPPLERLTLDCSEGSGTSGTQGVGSPQVLVESPAVLEPGTKE,GACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAGCGCATGGATTCTTAACAATTATCTGGGTCGACCTGCGGAGCCTGTTCCTCTTCAGCTACCACCACTTGAGAGACTTACTCTTGATTGTAGCGAGGGGAGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGCCAGGAACTAAAGAATAG,7794,8069,DPPPSSEGTRQARRNRRRRWRERQRQIRSIGERILNTYLDRPTEPVPLQLPPLERLTLDCSEDCGTSGTQGVGTPQILVEPPTVLESGTKE*,GACCCACCACCCAGCTCCGAGGGGACCCGACAGGCCCGGAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGTCGATCGGCGAGCGGATTCTTAACACTTATCTGGATCGACCTACGGAGCCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTACTCTTGATTGCAGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAACTCCTCAAATACTGGTGGAACCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG -MK115158.1,nef,8316,8939,forward,0.6866028708133971,0,MGGIFSKRNGGGWPAIRERMRRAQPAADGVGAASRDLERHGALTSNNTPANNADCAWLEAQEEGEEVGFPVRPQVPLRPMTYKGAVDLSHFLKEKGGLEGLIYSQQRQDILDLWVYNTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPMEPEDVEKATEGEDNILLHPVNQHGMDDSEREVLVWKFDSRLAFRHVAREIHPEYYKNC,ATGGGAGGCATATTCTCAAAACGTAATGGGGGTGGGTGGCCTGCAATAAGGGAAAGAATGAGACGAGCTCAGCCAGCAGCAGATGGGGTAGGAGCAGCATCTCGAGACCTGGAAAGACATGGAGCTCTCACAAGTAACAATACACCAGCTAACAATGCTGATTGTGCCTGGCTGGAAGCACAGGAAGAGGGGGAAGAAGTGGGTTTCCCAGTCAGACCTCAGGTACCTTTAAGACCAATGACATACAAGGGAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGTTAATTTACTCCCAACAAAGACAAGATATCCTTGATCTGTGGGTCTACAACACACAGGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATAAGATATCCACTGACCTTTGGATGGTGCTTCAAGCTAGTGCCAATGGAACCAGAGGATGTAGAAAAGGCCACTGAAGGAGAGGACAACATCTTGTTACACCCTGTGAACCAGCATGGGATGGACGACTCAGAGAGAGAAGTGTTAGTGTGGAAGTTTGACAGCCGCCTAGCATTCCGTCACGTGGCCCGAGAGATACATCCGGAGTACTACAAGAACTGCTGA,8213,8833,MGNKWSKSSIVGWPTIRERIRRTPPIAEGVGAVSRDLGKHGAITSSNTAANNPDLAWLEAQEGEEVGFPVRPQVPLRPMTYKGAFDLSFFLKEKGGLEGLIYSRKRQEILDLWVYHTQGYFPDWQNYTPGPGVRYPLTFGWCFKLVPVDPEEVEKANEGENNCLLHPMSQHGMEDEDREVLMWKFDRHLASKHVARELHPEYYKDC*,ATGGGAAACAAGTGGTCAAAAAGTAGTATAGTTGGATGGCCTACTATAAGGGAAAGAATAAGACGAACCCCTCCAATAGCAGAAGGGGTGGGAGCAGTCTCTCGAGACCTAGGAAAGCATGGAGCAATCACAAGTAGCAACACAGCAGCTAATAATCCTGACTTGGCCTGGCTGGAAGCACAGGAGGGTGAGGAAGTAGGCTTCCCAGTCAGACCTCAGGTACCTTTAAGACCAATGACTTACAAGGGAGCTTTCGATCTTAGCTTCTTTTTAAAAGAAAAGGGGGGACTGGAAGGGTTAATTTATTCCAGGAAAAGACAAGAGATCCTTGATCTATGGGTTTACCACACACAAGGATACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGGTATCCATTGACCTTTGGGTGGTGCTTCAAGCTAGTACCAGTTGACCCAGAGGAGGTAGAAAAGGCCAATGAAGGAGAAAACAACTGCTTGCTACACCCCATGAGCCAACATGGAATGGAGGATGAAGACAGAGAAGTACTGATGTGGAAGTTTGACAGACACCTAGCATCTAAGCACGTAGCCCGAGAGCTACATCCGGAGTATTACAAGGACTGCTGA -MK114705.1,gag,532,2046,forward,0.23247524752475246,0,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNTIAVLYCVHQRIDVKDTKEALNKIEEEQNKSKKKAQQAAADTGNSSQSSQVSQNYPIVQNHQGQMVYQALSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHNPPIPVGEIYKRWIIMGLNKIVRMYSPISILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQATGGATNIMMQKGNFRNQGKPIKCFNCGKEGHLARNCRAPRKKGCWKCGKEGHQMKDCSERQANFLGKIWPSHKGRPGNFLQNRPEPTAPPEESFRFGEETTAPPQKQEPTDKELYPFSSLKSLFGNDPSSQ,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGCGAATTAGATAGATGGGAGAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAGTTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCGGTTAATCCTGGCCTGTTAGAAACATCAGAGGGCTGTAGGCAAATACTGGGACAGCTACAACCGTCCCTTCAAACAGGATCAGAAGAACTTAAATCATTATTTAATACAATAGCAGTCCTTTATTGCGTACATCAAAGGATAGATGTAAAAGACACCAAGGAAGCTCTAAATAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGCAGCAGCTGACACAGGAAACAGCAGTCAGAGCAGTCAAGTCAGCCAAAATTACCCTATAGTGCAGAACCATCAGGGGCAAATGGTATATCAGGCTCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCCGAGGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAGGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTACATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGGACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACACATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCATGGGGTTAAATAAAATAGTAAGAATGTATAGCCCTATCAGCATTCTGGACATAAGACAAGGACCAAAAGAACCTTTTAGAGATTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAAGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACCATTCTAAAAGCATTAGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGGGGACCCAGCCACAAAGCAAGAGTCTTGGCTGAAGCAATGAGCCAAGCAACAGGTGGTGCAACTAACATAATGATGCAGAAAGGCAATTTTAGGAACCAAGGAAAACCTATTAAGTGTTTCAATTGTGGCAAAGAAGGGCACCTAGCTAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGACTGCTCTGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTCCTCCAGAACAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAGCTCCCCCTCAGAAGCAGGAGCCGACAGACAAGGAACTGTATCCCTTCTCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAGTAA,789,2291,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA -MK114705.1,pol,1839,4850,forward,0.17412935323383083,0,FFRENLAFPQGKAREFPPEQTRANSPTRRELQVWGRDNSSPSEAGADRQGTVSLLFPQITLWQRPLVTVKIGGQLKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTKIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDESFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIEQHRAKVENLREHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGAKALTEVITLTEEAELELAENREILKEPVHGVYYDPSKDLVAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIATESIVVWGKIPKFRLPIQKETWETWWMEYWQATWIPEWEFVSTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKKGKAGYVTDRGRQKVIPLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIKKEKVYLTWVPAHKGIGGNEQVDKLVSAGIRKILFLDGIDKAQEDHEKYHSNWKAMASDFNIPPVVAKEIIASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKIIHTDNGSNFTSTTVKAACWWAGVKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGEYCAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSEIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTCCTCCAGAACAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAGCTCCCCCTCAGAAGCAGGAGCCGACAGACAAGGAACTGTATCCCTTCTCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAGTAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAAATACCCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTAAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAACCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCCGTATTTGCCATAAAGAAAAAGGATAGTACTAAATGGAGAAAGTTAGTAGATTTCAGAGAGCTTAATAAAAGAACTCAAGACTTTTGGGAGGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTAGATGTGGGTGATGCATATTTCTCAGTTCCTTTAGATGAAAGCTTCAGAAAGTATACTGCATTTACCATACCTAGTACTAACAATGAGACACCCGGGATTAGGTATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTTCAAAGTAGCATGACGAAAATCTTAGAACCTTTTAGAAAACAAAATCCAGACATAGTTATCTACCAATACATGGATGATTTATACGTAGGATCTGACTTAGAAATAGAGCAGCATAGAGCAAAAGTAGAGAACCTGAGAGAGCATCTGTTGAGGTGGGGATTTACCACACCAGACAAAAAGCATCAGAAAGAACCTCCATTTCTTTGGATGGGCTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTATGCAGGGATTAAAGTAAAGCAATTATGTAAACTCCTTAGGGGAGCTAAGGCACTAACAGAAGTGATAACACTAACAGAAGAAGCAGAGCTAGAATTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAGTAGCAGAAATACAGAAGCAGGGGCAAGGTCAATGGACCTATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAGTATGCAAGAACAAGGGGTGCCCACACTAATGATGTAAAACAGTTAACAGAAGCAGTTCAAAAAATAGCCACAGAGAGCATAGTAGTATGGGGAAAGATTCCTAAATTTAGATTACCCATACAGAAAGAAACATGGGAAACATGGTGGATGGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAGTACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAGACTAAAAAAGGAAAAGCAGGATATGTTACTGACAGAGGAAGGCAAAAAGTTATCCCCTTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGACAAGAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAGAAGGTCTACCTGACATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCAGGAATCAGGAAAATACTATTTTTGGATGGAATAGATAAGGCCCAGGAAGATCATGAGAAATATCATAGTAATTGGAAAGCAATGGCTAGTGATTTTAACATACCACCTGTGGTAGCAAAAGAGATAATAGCCAGCTGTGATAAATGTCAGCTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTCCATGTAGCCAGTGGGTACATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTATCTTAAAATTAGCAGGAAGATGGCCAGTAAAAATAATACATACAGATAATGGTAGCAATTTCACCAGCACTACAGTCAAGGCCGCCTGCTGGTGGGCAGGTGTTAAGCAGGAGTTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTGGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGAGTACTGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAACTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAGGGACCAGCAAAGCTGCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGAAATCAAAGTAGTACCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG,2084,5095,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK114705.1,vif,4795,5373,forward,0.34375,0,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYISKKAKGWVYRHHYESTHPRISSEVHIPLGDAKLVITTYWGLHTGERDWHLGQGVSIEWRKKRYNTQVDPGLADQLIHMYYFDCFSESAIRQAILGHRVSPSCEYQAGHNKVGSLQYLALAVLVAPKKIKPPLPSVARLTEDRWNKPRKIKGHRESHTMNGH,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTCTAGTAAAACACCATATGTATATTTCAAAGAAAGCTAAGGGATGGGTTTACAGACACCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAAATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGCCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAACACACAAGTAGACCCTGGCCTAGCAGACCAACTAATTCATATGTACTATTTTGATTGTTTTTCAGAATCTGCTATAAGACAAGCCATATTAGGACATAGAGTTAGTCCTAGTTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTGCAATACCTGGCACTAGCAGTATTAGTAGCACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTCGCGAGACTGACAGAGGATAGATGGAACAAGCCCCGGAAGATCAAGGGCCACAGAGAGAGCCATACAATGAATGGGCACTAG,5040,5618,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG -MK114705.1,vpr,5313,5597,forward,0.40312499999999996,0,MEQAPEDQGPQREPYNEWALELLEELKREAVRHFPREWLHSLGQHIYETYGDTWAGVGAIIRILQQLLFIHFRIGCHHSRIGILRRTRNGARRS,ATGGAACAAGCCCCGGAAGATCAAGGGCCACAGAGAGAGCCATACAATGAATGGGCACTAGAACTTTTAGAGGAGCTTAAGAGGGAAGCTGTTAGACATTTTCCTAGGGAATGGCTCCATAGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGGCAGGAGTAGGAGCCATAATAAGAATACTGCAACAATTACTGTTTATTCATTTCAGAATTGGGTGTCACCATAGCAGGATAGGCATACTGAGGAGAACAAGAAATGGAGCCCGTAGATCCTAG,5558,5848,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -MK114705.1,tat_exon1,5578,5792,forward,0.655263157894737,0,MEPVDPRLEPWKHPGSQPRTPCNNCYCKKCCFHCQVCFTKKGLGISYGRKKRRQRRGPPRDSQIHQVPLPKX,ATGGAGCCCGTAGATCCTAGACTGGAACCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTCCTTGTAACAATTGCTATTGTAAAAAGTGCTGCTTTCATTGCCAAGTTTGCTTCACAAAAAAGGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGCGACAGAGACGAGGACCTCCTCGAGACAGTCAGATTCATCAAGTTCCTCTACCAAAACA,5829,6043,MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK114705.1,rev_exon1,5717,5792,forward,0.8076923076923077,0,MAGRSGDRDEDLLETVRFIKFLYQNX,ATGGCAGGAAGAAGCGGCGACAGAGACGAGGACCTCCTCGAGACAGTCAGATTCATCAAGTTCCTCTACCAAAACA,5968,6043,MAGRSGDSDEELIRTVRLIKLLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK114705.1,vpu,5809,6054,forward,0.8310344827586209,0,MQPLEISAIVALVVVAIIAIVVWTIVLLEYRKILRQKKIDRLINRISERAEDSGNESDGDQEELSALMEMGRLAPWNVDDL,ATGCAACCTTTAGAGATATCAGCAATAGTAGCATTAGTAGTAGTAGCAATAATAGCAATAGTTGTGTGGACCATAGTACTCTTAGAGTATAGGAAAATATTAAGGCAAAAGAAAATAGACAGATTAATTAATAGAATAAGTGAAAGAGCAGAAGACAGTGGCAATGAGAGTGACGGGGATCAAGAAGAATTGTCAGCTCTTATGGAGATGGGGCGCCTTGCTCCTTGGAATGTTGATGATCTGTAG,6060,6308,MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*,ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG -MK114705.1,env,5972,8548,forward,0.6253647586980922,0,MRVTGIKKNCQLLWRWGALLLGMLMICSATNMWVTVYYGVPVWKDATTTLFCASDAKAYDTEIHNVWATHACVPTDPDPQEVVLENVTENYNMGKNNMVEQMHEDIISLWDQSLKPCVLLTPFCVTLNCTDANITSTNNSRDKKEGESTLEETKGEIKNCSFNMTSSMSDKSQKQRALFYKLDVVQIDETNNNSYRLISCNTSVVTQACPKVSFDPIPIHYCAPAGFAILKCNNKKFNGTGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVMIRSENFTNNAKTIIVQLKTPVQINCTRPNNNTRKRISMGPGRVIYATGQIIGDIRKAHCNISRAEWNTTLKQIVTQLRKQWNRTIIFNSSSGGDPEIVMHSFNCRGEFFYCNTTKLFNSTWPRNSTWNNTEGSNDTEIITLPCRIKQIVNRWQEVGKAMYAPPIQGQISCSSNITGLLLVRDGGINTSESNETFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGMLGAMFLGLLGAAGSTMGAASVTLTVQTRLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARLLAVERYLQDQQLLGIWGCSGKLICTTTVPWNTSWSNRSYEDIWNNMTWMEWEKEIDNYTGLIYTLIEKSQNQQEINEQELLSLDKWASLWNWFNITNWLWYIKIFIMIVGGLIGLRIVFAVLSIVNRVRQGYSPLSLQTPRPAPRGPDRPEEIEEGGGERDRDRSVRLVTGFFALFWDDLRSLCLFSYHHLRDLILIVVRVVEILGRRGWEALKYWWNLLQYWSQEIKNSAISLLNATAIAVAEGTDRIIGVVQRTWRAFIHIPRRIRQGFERALL,ATGAGAGTGACGGGGATCAAGAAGAATTGTCAGCTCTTATGGAGATGGGGCGCCTTGCTCCTTGGAATGTTGATGATCTGTAGTGCTACAAACATGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGATGCAACCACCACTCTATTCTGTGCATCAGATGCTAAAGCATATGATACAGAGATACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCGACCCACAAGAAGTAGTACTGGAAAATGTGACAGAAAATTATAATATGGGAAAAAATAACATGGTGGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTACTCTTAACCCCATTCTGTGTCACTTTAAATTGCACTGATGCTAACATCACCAGCACTAATAATAGTAGAGATAAGAAGGAAGGAGAAAGTACATTGGAGGAGACGAAAGGAGAAATAAAAAACTGCTCTTTCAATATGACTTCAAGCATGAGCGATAAGTCTCAGAAACAACGTGCACTTTTTTATAAGCTTGATGTGGTACAAATAGATGAGACTAATAATAATAGTTATAGGTTGATAAGTTGTAACACCTCAGTCGTCACACAGGCTTGTCCAAAGGTATCCTTTGATCCAATCCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAATAATAAGAAATTCAATGGAACAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAACCTGTAGTGTCAACCCAACTGCTGTTAAATGGTAGTCTAGCAGAAGAAGAAGTAATGATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTGCAGCTGAAGACACCTGTACAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGGATAAGTATGGGACCAGGGAGAGTAATTTATGCAACAGGACAAATAATAGGAGATATAAGAAAAGCACATTGCAACATTAGTAGAGCAGAATGGAATACAACTTTAAAGCAGATAGTTACACAATTAAGAAAGCAGTGGAATAGAACCATAATCTTTAACTCATCCTCAGGAGGGGACCCAGAAATTGTGATGCACAGTTTTAATTGTAGAGGGGAATTTTTCTACTGTAATACAACAAAACTATTTAATAGTACTTGGCCACGTAATAGTACTTGGAATAATACTGAAGGGTCAAATGACACTGAAATAATCACACTCCCGTGCAGAATAAAACAAATTGTAAACAGGTGGCAGGAAGTAGGCAAAGCAATGTATGCCCCTCCCATCCAAGGACAAATTAGTTGTTCATCAAATATTACAGGGCTGCTACTAGTTAGAGATGGTGGAATTAACACCAGTGAGAGCAACGAGACCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAGGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATGCTGGGAGCTATGTTCCTTGGGCTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGTTGACGGTACAGACCAGACTATTATTGTCTGGTATAGTGCAACAGCAGAACAATCTGCTGAGGGCTATTGAGGCGCAGCAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAACAGCTCCAGGCAAGACTCCTAGCTGTGGAAAGATACCTACAGGATCAACAGCTCCTAGGGATTTGGGGTTGCTCTGGAAAACTCATCTGCACCACTACTGTGCCTTGGAACACTAGTTGGAGTAATAGATCTTATGAAGATATTTGGAACAACATGACCTGGATGGAGTGGGAAAAAGAAATTGACAATTACACAGGCTTAATATACACCTTAATTGAAAAATCGCAGAACCAGCAGGAAATAAATGAACAAGAACTATTGTCATTGGATAAGTGGGCAAGCCTGTGGAATTGGTTTAATATAACAAATTGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGATAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTGTCATTGCAGACCCCCCGCCCAGCCCCAAGGGGACCCGACAGGCCCGAAGAAATCGAAGAAGGAGGTGGAGAAAGAGACAGAGACAGATCAGTGCGATTAGTGACTGGATTCTTCGCACTTTTCTGGGACGATCTACGAAGCCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTAATCTTGATTGTAGTGAGGGTTGTGGAAATTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTGCAGTACTGGAGTCAGGAAATAAAGAATAGTGCTATCAGCTTGCTCAACGCCACAGCTATAGCAGTAGCTGAGGGGACAGATAGAATCATAGGAGTAGTACAAAGAACTTGGAGAGCTTTTATCCACATACCTAGGAGAATAAGACAGGGCTTTGAAAGGGCTTTGCTATAA,6223,8793,MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*,ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA -MK114705.1,tat_exon2,8130,8222,forward,0.9303030303030304,0,RPPAQPQGDPTGPKKSKKEVEKETETDQCD,AGACCCCCCGCCCAGCCCCAAGGGGACCCGACAGGCCCGAAGAAATCGAAGAAGGAGGTGGAGAAAGAGACAGAGACAGATCAGTGCGATTAG,8375,8467,RPTSQPRGDPTGPKEPKKKVERETETDPFD*,AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG -MK114705.1,rev_exon2,8131,8406,forward,0.5591397849462365,0,DPPPSPKGTRQARRNRRRRWRKRQRQISAISDWILRTFLGRSTKPVPLQLPPLERLNLDCSEGCGNSGTQGVGSPQILVESPAVLESGNKE,GACCCCCCGCCCAGCCCCAAGGGGACCCGACAGGCCCGAAGAAATCGAAGAAGGAGGTGGAGAAAGAGACAGAGACAGATCAGTGCGATTAGTGACTGGATTCTTCGCACTTTTCTGGGACGATCTACGAAGCCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTAATCTTGATTGTAGTGAGGGTTGTGGAAATTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTGCAGTACTGGAGTCAGGAAATAAAGAATAG,8376,8651,DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*,GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG -MK114705.1,nef,8550,9117,forward,0.9368932038834918,122,MGGKWSKKSGGGWPAVREKMKRTEPAAEGVGAASRDLDKYGAITSSNTAQTNPDCAWLEAQEEEEVGFPVRPQVPLRPMTYKAAVDMSHFLKEKGGLEGLIHSQKRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGGCFK,ATGGGTGGAAAATGGTCAAAAAAGAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAAAATGAAGCGAACTGAGCCAGCAGCAGAGGGGGTGGGAGCAGCATCTCGAGACCTGGACAAATATGGAGCAATCACAAGTAGCAATACAGCACAGACCAATCCTGATTGTGCCTGGCTAGAAGCACAAGAAGAGGAAGAGGTAGGCTTTCCAGTCAGACCCCAGGTACCTTTGAGACCAATGACTTACAAGGCAGCTGTGGATATGAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAAGAGACAAGATATCCTTGATCTGTGGATCTATCACACACAAGGCTACTTCCCTGATTGGCAAAATTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGAGGGTGCTTCAAGTAGTGTGTGCCCGTCTGTTGTGTGACTCTGGTAACTAGAGATCCCTCAGACCATTTTAGTCAGCGTGGAAAATCTCTAGCAGTGGCGCCCGAACAGGGACCTGAAAGCGAAAGTAGAACCAGAGGAGATCTCTCGA,8795,9415,MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA -MK114856.1,gag,492,2021,forward,1.871000000000001,828,MTKTLLVQNANPDCKTILKALGPAATLEEMMTACQKVRRPGHKAKVLAEAMSQATGAANIMMQRGNFKNQRKPVKCFNCGKEKHIAKNCKAPKKKGC,ATAGGTGCGAGAGCGTCAGTATTGAGCAGAGGAGAATTAGATAGATAGGAGAAAATTCAGTTAAGGCCAAGGAGAAAGAAAAAATATAGATTAAAACATATAGTATAGGCAAGCAAGGAACTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGGAGGCTGTAAACAGATATTAGAACAGCTACAACCATCCCTTCAGACAAGATCAGAAGAACTTAGATCATTATATAATACAGTAGCCACCCTCTATTATGTACATCAAAAGATAGATGTAAAAGACACCAAAGAAGCGTTAGACAAAGTAGAAGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGCGGCAGCTGACACAAGAAACAGAGGCCAGACCAGTCAAAATTTCCCTATAGTGCAGAACCTACAAGGGCAAATAGTACATCAGGCCATATCACCTAGAACTTTAAATGCATAAGTAAAAGTAGTAGAAGAAAAAGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACTATGCTAAACACAGTAAGTAGACACCAAGCAGCTATGCAAATGTTAAAAAAGGTCATCAATGAAGAAGCTGCAGAATAAGATAGATTACATCCAGTGCATGCAAGGCCTATTGCACCAGGCCAGATAAGAGAACCAAAAAGAAGTGACATAGCAAGAACTACTAGTACCCTTCAGGAACAAATAAGATAGATGACACATAATCCACCTATCCCAGTAAGAAAGATTTATAAAAGATAAATAATTCTAGGACTAAATAAAATAGTAAAAATGTATAGCCCTACCAGCATTTTAGACATAAAGCAAAGGCCAAAAGAACCCTTTAGAGACTATGTAGACCAGTTCTATAAAACTTTAAGAGCCAAGCAAGCTACACAGAAAGTAAAAAATTAGATGACAAAAACCTTGTTAGTCCAGAATGCAAATCCAGATTGTAAAACTATTTTAAAAGCATTAGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAAAAAGTAAGAAGACCCGGCCATAAAGCAAAAGTTTTAGCTGAAGCAATGAGCCAAGCAACAGGTGCAGCCAACATAATGATGCAGAGAGGCAATTTTAAGAACCAAAGAAAGCCTGTTAAATGTTTCAATTGTGGCAAAGAAAAGCACATAGCCAAAAATTGCAAGGCCCCTAAGAAAAAAGGCTGTTAGAAATATAGAAAAGAAAGACACCAAATGAAAAATTGCACTAAGAGACAGGCTAATTTTTTAAAGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGAGAACTTTCCTCAAAGCCGACCAGAAATTCCTCAGAGCAGACCAGAGCCAACAGCCCCACCAGCCCCTCCAGAAAAGAGCTTCAAGTTTGAAGAAGCAACAACTCCCTCTCAGAAGCAGGAGACAATAGACAAGGAGCTGTATCCTCTAACTTCCCTCAGATCACTCTTTGGCAACGACCCATCGTCACAATAA,789,2291,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA -MK114856.1,pol,1787,4825,forward,0.5422287390029328,1998,ASQIYAKIKVKQLCKLLKRTKALTEVVPLTEEAELELAENRKILKDPVHRAYYDPAKDLIAELQKQREGQWTYQIYQKPFKNLKTEKYARTKGAHTNDVKQLTEAVQKISTESIVI,TTTTTTAAAGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGAGAACTTTCCTCAAAGCCGACCAGAAATTCCTCAGAGCAGACCAGAGCCAACAGCCCCACCAGCCCCTCCAGAAAAGAGCTTCAAGTTTGAAGAAGCAACAACTCCCTCTCAGAAGCAGGAGACAATAGACAAGGAGCTGTATCCTCTAACTTCCCTCAGATCACTCTTTGGCAACGACCCATCGTCACAATAAAGATAAGAGAGCAACTAAAGAAAGCTTTATTAAATACAGGAGCAGATGATACAGTATTAGAAGACATAGATTTGCCAAGAAAATAGAAACCAAAAATGATAAGAAGAATTAGAAGTTTTATCAAAGTAAGACAATATGAACAGGTACCCATAGAAATTAGCAGACACAAAGCTATAAGTACAGTGTTAGTAGGACCTACACCTGTCAACATAATTAGAAGAAATCTGTTGACTCAGCTTAGTTGCACTCTAAATTTTCCCATCAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAAGAATGGACGGCCCAAAAGTTAAACAATAGCCATTAACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATAGAAAAAGAAAAGAAAATTTCAAAAATTAGGCCTGAAAATCCATACAATACTCCAGTGTTTGCCATAAAGAAAAAAGATGGCACTAAATAGAAAAAATTAGTAGATTTCAGAGAACTTAATAAGAAAACTCAAGATTTCTAAGAAATTCAATTAAGTATACCACATCCTGCAAAGCTAAAAAAGAAAAAATCAGTCACAGTACTAGATGTAGGTGATGCATATTTTTCAGTTCCCTTAGATAAAGACTTCAAGAAATATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAAAGATTAGATATCAGTATAATGTGCTTCCACAAAGATAGAAAAGATCACCAGCAATATTCCAAAGTAGCATGACAAAGATCCTAGAGCCTTTTAGAAAGACAAATCCAGACATAGTTATCTATCAATACATAGATGATTTGTATGTAAGATCTGACTTAGAAATAAGGCAACATAGAACAAAAGTAAAGGAACTGAGGCAACATCTAATGAGGTAAAGATTTACCACACCAGACAAAAAGCACCAGAAAGAACCCCCATTCCTTTAGATGAGGTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTACTGCCAGAAAAAGAAAGTTAGACTGTCAATGACATACAGAAGTTAGTAAGAAAATTGAATTAGGCAAGTCAGATTTATGCAAAGATTAAAGTGAAGCAATTATGTAAGCTCCTTAAAAGAACCAAAGCATTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAAAGATTCTAAAAGATCCAGTACATAGAGCATATTATGACCCAGCAAAGGACTTAATAGCAGAACTACAGAAGCAAAGAGAAGGTCAGTGGACATATCAAATTTATCAAAAGCCATTTAAAAATCTAAAAACAGAGAAATATGCAAGAACGAAAGGTGCCCATACTAATGATGTAAAGCAATTAACAGAAGCAGTGCAAAAAATATCCACAGAAAGTATAGTAATATAAAGAAGGACTCCTAAATTTAGATTACCCATACAAAAAGAAACATAAGAAACATAGTGGACAGATTATTGGCAAGCCACCTAGATCCCTAAGTAAGAGTTTGTCAACACCCCTCCCTTAGTGAAATTATAGTACCAGTTAGAAAAAGAACCCATAATAAGAGCAGAAACCTTCTATGTAGATAAGGCAGCTAATAAAGATAATAAATCAAGAAAAGCAAGATATGTTACTGACAGAAGAAGACAAAAAGTTGTCACCATAAATGACACAACAAATCAGAAAACTGAATTACAAGCCATTCATCTAGCCTTGCAAGATTCAGGGCTAGAAGTAAACATAGTAACAGACTCACAGTATGCATTAAAGATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTCAGTCAAATAATAGAACAGTTAATAAAAAAGAAAAAAGTCTACCTGGCATAAGTGCCAGCCCACAAAAGAATTAAAAGAAATGAACAGGTAAATAAACTAGTCAGTGCTAGAATCAAGAAAGTACTATTTTTAGATAAAATAGAAAAAGCCCAAGAAGACCATAAAAAATATCACAGTAATTAAAGAACAATGGCTAGTAATTTTAACCTACCACCTATAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAAAAGAAGCTATGCATAGGCAAGTAGACTGTAGTCCAGGTATATGGCAGCTAGATTGTACACATTTAGAAGAAAAAATTATCCTAGTAGCAGTTCATGTAGCCAGTAGATATATAGAAGCAGAAGTGATTCCAGCAGAGACAAGGCAAGAAACAGCATACTTTATTTTAAAACTAGCAAGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCCGCAATTTCACCAGTAATACAGTGAAGGCCGCCTGTTAGTAGGCAAAAATCAAGCAAGAATTTAGTATTCCCTACAATCCCCAAAGTCAAGAAGTAGTAAAATCTATAAATAATGAATTAAAGAAAATTATAAGACAAGTAAAAGATCAGGCTAAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGAAGGATACAGTGCAGAGGAAAGAATAGTAGACATAATAGCAACAGAAATACAGACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCAGGTTTATTACAAGGACAGCAGAGATCCACTTTAGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAAAGGCAGTAGTAATACAAGATAACAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAAATCATTAAGGATTATAAAAAACAGATGGCAAGTGATGATTGTGTGGCAAGTAGACAGGATGAAGATTAG,2084,5095,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK114856.1,vif,4770,5348,forward,1.928125,328,MIVWQVDRMKIRTWKSLVKYHMYISKKAKK,ATAAAAAACAGATGGCAAGTGATGATTGTGTGGCAAGTAGACAGGATGAAGATTAGAACATGGAAAAGTTTAGTAAAATACCATATGTATATTTCAAAGAAAGCCAAGAAATAGGCTTATAGACACCATTACGAAACCACTCATCCAAGAATAAGTTCAGAAGTACATATCCCACTAGGAGGTGCTAGATTAGTAATAACAACATATTAAGGTCTGCATACAGGAGAAAAAGACTGGCATTTAGGCCAGGGAGTCTCCATAGAATGGAGAAAAAGGAGATATAGCACACAAGTAGACCCTGACTTAGCAGACCATCTAATTCATGTGCATTATTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATAGAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAAGTAAGATCTCTACAATACTTGGCACTAACAGCATTAATAACACCAAAGAAGATAAAGCCACCTTTGCCTAGTGTAAAGAAACTGACAGAAGATAGATAGAACAAGCCCCAGAAGACCAAAGGCCACAGAAAGAGCCATACAATGAATAGACACTAG,5040,5618,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG -MK114856.1,vpr,5288,5578,forward,1.7061855670103092,143,MLFIHFRIKCHHSRIGIVLQRRARNRASRS,ATAGAACAAGCCCCAGAAGACCAAAGGCCACAGAAAGAGCCATACAATGAATAGACACTAGAGCTTTTAGAAGAGCTTAAGAGAGAAGCTGTTAGACATTTCCCTAGACCATGGCTACAAAACCTAAGACAATATATCTATGAAACTTATAAAGATACTTGGACAAGAGTAGAAGCCATAATAAGAATTCTGCAACAAATGCTGTTTATTCATTTCAGAATTAAGTGTCACCATAGCAGAATAGGCATTGTTCTGCAAAGAAGAGCAAGAAATAGAGCCAGTAGATCATAG,5558,5848,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -MK114856.1,tat_exon1,5559,5773,forward,0.5,123,MKLIKILGQE,ATAGAGCCAGTAGATCATAGACTAGAGCCCTAGAAGCATCCAGGAAGTCAGCCTAGGACTCCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTGTGTTTTATACAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCA,5829,6043,MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK114856.1,rev_exon1,5698,5773,forward,0.5769230769230769,0,MAGRSGDRDEDLLKTVRLIKFLYQSX,ATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCA,5968,6043,MAGRSGDSDEELIRTVRLIKLLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK114856.1,vpu,5790,6038,forward,1.7719512195121947,122,MQPLKILAIVALVVAAIIAIVV,ATGCAACCTTTAAAGATATTAGCAATAGTAGCATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTAGACCATAGTAGGCATAAAATATAAGAAAATATTAAGACAAAGAAAAATAGATAGAATAATTAATAGAATAAGAAAAAGAGCAGAAGACAGTGGCAATGAGAGCGAAAAAGATCAGGAAGAATTGTCAGCACTTGTAGTAGAAATAAGGCACGATGCTCCTTAAGATGTTGATGATCTGTAG,6060,6308,MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*,ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG -MK114856.1,env,5953,8520,forward,1.890116279069764,1453,MTNCSFNITTEIRDKVRKEYALFYKLDVMPIDKDNTSYTLINCNTSVITQACPKVTFEPIPIHYCTPASFAILKCKDEMFNRTRPCKNVSTVQCTHRIRPVVSTQLLLNSSLAEKKIVLRSENFTDNTKNIIVQLNRSIVINCTRPNNNTRKSISVAKRAIYATRQIIEDIRQAHCNISETD,ATGAGAGCGAAAAAGATCAGGAAGAATTGTCAGCACTTGTAGTAGAAATAAGGCACGATGCTCCTTAAGATGTTGATGATCTGTAGTGCTGCGGAAAATTTGTAAGTCACAGTCTATTATGAGGTACCTGTGTAAAGAGATGCAAATACCACTCTATTCTGTGCATCAGATGCTAAAGCATATGATACAGAAGTACATAATGTTTAGGCCACACATGCCTGTGTACCCACAGACCCTAACCCACATGAAGTAGAATTGAAAAATGTGACAGAAAATTTTAATATGTAGAAAAATAACATGGTAGACCAGATGCATGAGGATATAATCAATTTATGAGATCAAAGCCTAAAGCCATGTGCAAAATTGACCCCACTCTGTGTTACTTTAAATTGCACTGATTTAAAAAATAATACTGTAGGAAATCAAACAAATTATCATCTCAATGAAACTAATACAATACAAAGAAAAGAAATGACAAACTGCTCTTTCAATATCACCACAGAGATAAGAGATAAAGTGCGAAAAGAATATGCACTTTTTTATAAACTTGATGTCATGCCAATAGATAAAGATAATACAAGTTATACATTGATAAATTGCAATACCTCAGTCATTACACAGGCCTGTCCAAAGGTAACCTTTGAACCAATTCCCATACATTATTGTACTCCAGCTAGTTTTGCAATTCTAAAGTGTAAAGATGAGATGTTCAATAGAACAAGACCATGTAAGAATGTCAGCACAGTACAATGTACACATAGAATTAGACCAGTAGTGTCAACTCAACTGCTGTTAAATAGTAGCCTAGCAGAAAAAAAGATAGTACTTAGATCTGAAAATTTCACAGACAATACTAAAAATATAATAGTACAGCTAAATAGATCTATAGTAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGTATAAGTGTAGCAAAGAGAGCAATTTATGCAACAAGACAGATAATAGAAGATATAAGACAAGCACATTGTAACATCAGTGAGACAGATTGAAGTGACACTTTAAGCAAAATAGTTGAAAAATTAAAGGAAAAATTTAGAAAAAATAAAACAATAATCTTTAAGCAATCATCAAGAGAGGACATAGAAATTGAAACGCACAGTTTTAATTGTAGAGAGGAATTTTTCTACTGTAACACAACACGACTGTTTAATAGTACTTGAAGTGTTAATAGAACTAGCATAAACAGAACTAACAATAAAAATATCACACTCCCGTGCAGAATAAAACAAATTATCAACAGGTGGCAGGAAGTAAGAAAAGCAATGTATGCCCCTCCTATCAGTAAGATAATTAGGTGCTCATCAAATATTACAGGACTAATATTAACAAGAGATAGTAGTACAACTAATAGTAAAGAAGAGACCTTCAGACCTAGAGAAAGAAATATGAAGGACAATTAGAGAAGTGAATTATATAAATACAAAGTAGTAAAAATTGAGCCATTAAAAGTAGCACCCACCAAGGCACAAAGAAAAGTAGTGCAGAGAGAAAAAAGAGCAATAAGAACGTTAGGAGCTATGTTCCTCAGGTTCTTAAGAACAGCAGGAAGCACTATAGGCGCAGCGTCACTGACGCTGACAGTACAAGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATCTGCTGAAGGCTATTGAGGCGCAACAGCATATGTTGCAACTCACAGTCTAAGGCATCAAACAGCTCCAGGCAAGAGTCCTGTCTGTAGAAAGATACCTACAAGATCAACAGCTCCTGAAGATTTGAAGTTGCTCTAGAAAACTCATTTGCACCACTACTGTGCCTTAGAATACTAGTTAGAGCAATAAATCTTACAGTACCATCTAAGATAACATGACCTAGATGCAGTAGGACAGAGAAATTCAAAATTACACAAAGATAATATACAACTTACTTAAAGAATCGCAAATCCAACAGAAAAAGAATGAAAAAGAATTATTAGAACTAGATCAATGAGCAAATTTGTAGAATTAGTTTAGTATAACAAAATGGCTATAGTATATAAAAATATTCATAATGATAGTAGGAGGCTTAGTAAGTTTAAAAATAATTTTTGCTGTATTTTCTATAGTGAATAGAGTTAGGCAAGGATACTCACCATTATCATTGCAGACCCTCCTCCCAACCCCAAGAAGACCAGACAGGCCCGAAAGAATAGAAGAAGAAGGTGAAAAGAGAGACAGAGGCAGATCCAGAAGATTAGTGACTAGATTCTTACCACTCATCTAAGACGACCTGCAAAGCCTGTGCCTTTTCAGCTACCACCACTTAAAAGACTTACTCTTGATTGTGCTGAAGACTGTGCAAATTCTAGGACACAAGAAGTAAGAGATCCTCAAGTACTAGTAGAGTCTCCTGCAGTATTAAATTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATACCATAGCTATCGCAGTAGCTGAAAGAACAGATAAAGTTATAGAAGTAAGACAAAAAATTAGCAGAGCTTTTCTCCACATACCTAGAAAGATAAGACAAGGCTTAGAAAAGGCTTTGCAATAA,6223,8793,MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*,ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA -MK114856.1,tat_exon2,8102,8194,forward,0.7741935483870968,37,RPSSQPQEDQTGPKE,AGACCCTCCTCCCAACCCCAAGAAGACCAGACAGGCCCGAAAGAATAGAAGAAGAAGGTGAAAAGAGAGACAGAGGCAGATCCAGAAGATTAG,8375,8467,RPTSQPRGDPTGPKEPKKKVERETETDPFD*,AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG -MK114856.1,rev_exon2,8103,8378,forward,0.8478260869565217,69,ILTTHLRRPAKPVPFQLPPLKRLTLDCAEDCANSRTQEVRDPQVLVESPAVLNSGTKE,GACCCTCCTCCCAACCCCAAGAAGACCAGACAGGCCCGAAAGAATAGAAGAAGAAGGTGAAAAGAGAGACAGAGGCAGATCCAGAAGATTAGTGACTAGATTCTTACCACTCATCTAAGACGACCTGCAAAGCCTGTGCCTTTTCAGCTACCACCACTTAAAAGACTTACTCTTGATTGTGCTGAAGACTGTGCAAATTCTAGGACACAAGAAGTAAGAGATCCTCAAGTACTAGTAGAGTCTCCTGCAGTATTAAATTCAGGAACTAAAGAATAG,8376,8651,DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*,GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG -MK114856.1,nef,8522,9175,forward,1.9499999999999997,403,MSLHRMEDSEGEVLMWKFDSSLAFHHRAREQHPEYYKDC,ATAAGTGGCAAGTAGTCAAAAAGTTGTATGGCTAGATAGCCTGCTGTAAAAGAAAGAATAGAAAGAGTTAATCCAAGGCCTGCTGCAAAGAAAGAACAAGCTGAGCCAGCAGCAGCTAAGGTAAGAGCAGCATCTCGAGACTTAGAAAAATATAGAGCAATCACAAGTAGCAATACATCAACTACTAATGCTGCTTGTGCCTAGCTAGAAGCACAAGAGGAAGAAGAAGTAGGCTTTCCAGTCAGACCTCAAGTACCTTTAAGACCAATGACTTACAAAGCAGCTTTAAATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTAGTCCCAGAGAAGACAAGAAATCCTTGATTTGTGGGTTTACCACACACAAGGATACTTCCCTGATTGGCAAAACTACACACCAGGACCAGGGGTCAGAAATCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGACCAAGTAGAGAAGGCCAATGAAGGAGAGAACAACTGCTTGCTACACCCTATGAGCCTGCATAGGATGGAAGACTCAGAGGGAGAAGTGTTAATGTGGAAGTTTGACAGCAGCCTGGCGTTTCATCATAGAGCCCGAGAACAACATCCGGAGTACTACAAAGACTGCTGA,8795,9415,MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA -MK115009.1,gag,302,1820,forward,1.8444000000000005,844,MTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVRGPGHKARVLAEAMSQVTKSASIIVQGGNFKNQRKNVKCFNCGKERHTAKNCRAPKKKGC,ATGGGTGCTAGAGCGTCAGTATTAAGCGGCGGAAAATTAGATAGATAGGAAAAAATTTACCTAAGGCCAGAAGGAAAGAAAAAATATAGATTAAAACATATAGTATAGGCAAGCAGGGAGCTAGAACGATTCGCAGTCAACCCTGGCCTGTTAGAGTCATCAGAAGGCTGCAGGCAAATTCTAGGACAGCTACAACCAGCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAGTCCTCTATTGCGTACATCAAAGGATAAATGTAAAAGACACCAAGGAAGCTCTAGACAAAATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAACAAGCAGCAGCTGACACAGGAAACAGCAGCAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACCTACAGGGGCAAATAGTACATCAACCATTATCACCTAGAACTTTAAATGCATAGGTAAAAGTAATAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCACTATCAGAAAGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGAGAGGACATCAAGCAGCCATGCAGATGTTAAAAGAAACCATCAATGATGAGGCTGCAGAATAGGATAGATTGCATCCAGTGCATGCAGGCCCTATTGCACCAGGCCAGATGAAAGAACCAAGAGGAAGTGACATAGCAGGAACCACTAGTACCCTTCAGGAACAAATAAGATAGATGACACATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTAAGGCTAAACAAAATAGTAAGAATGTATAGCCCTGTCAGCATTTTGGACATAAGACAAAGACCAAAAGAACCCTTTAAAGATTATGTAGACCGATTCTATAAAACTCTAAAGGCTGAGCAAGCGTCACAGGATGTAAAAAATTAGATGACAGAAACCTTGTTAGTCCAAAATGCGAATCCAGATTGTAAGACCATTTTAAAAGCATTAGGACCAGCAGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTAAGAGGACCCGGCCACAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAAGTCAGCCTCCATAATAGTGCAAGGAGGCAATTTTAAGAACCAAAGAAAGAATGTTAAGTGTTTCAATTGTGGCAAAGAAAGACACACAGCCAAAAATTGCAGGGCCCCTAAGAAAAAAGGCTGTTAAAAATGTAGAAAGGAAAGACACCAAATGAAAGATTGTACTAAGAGACCAGACTAAGACGGCTAATTTTTTAAGGAAGATTTGGCCTTCCCACAAGGGAAGGCCAGAGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTGAGGAAGAGGCAACAGCTCCTCCTCAGAAGCAGGAGACGAAAGACCAAGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAA,789,2291,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA -MK115009.1,pol,1613,4624,forward,0.4463220675944335,1728,AEIKQEFSIPYNPQSQRVVKSMNNKLKKIIGQVKDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQISKIQNFQVYYRDSREPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKILRDYRKQMAGDDCVASRQDED,TTTTTTAAGGAAGATTTGGCCTTCCCACAAGGGAAGGCCAGAGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTGAGGAAGAGGCAACAGCTCCTCCTCAGAAGCAGGAGACGAAAGACCAAGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAAAAATAGGAGGGCAACTAAAAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATAAATTTGCCAGGAAAATAGAAACCAAAAATGATAAAAAGAATTAGAAGTTTTATCAAAGTAAGACAGTATGATCAGGTACCCATAGAAATTTGTGGACATAAAGCTATAGGTACAGTATTAATAGGACCTACGCCAGTCAACATAATTAGAAGAAATCTGTTGACCCAGCTTAGTTGCACTTTAAATTTTCCCATCAGTCCTATTGAAACTGTACCAGTAAAATTAAAGTCAGGAATAGATAGCCCAAGAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAGGCATTAATAGAAATTTGTGCAGAAATAGAAAAGGAAAAGAAAATTACAAAAATTAGGCCTGAGAATCCATATAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGAAAAAAATTAGTAGATTTCCGAGAACTTAATAAGAGAACACAAGACTTCTAAGAAGTTCAACTAAGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTAGATGTAGGTGATGCATATTTTTCAGTTCCTTTAGACAAAGACTTCAGAAAGTATACTGCATTTACCATACCCAGTATAAACAATGAGGCACCAGAAGTTAAATATCAGTACAATGTGCTTCCACAGAGATAGAAAAGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGGAAAATCCAGACATAGTTATCTATCAATACATAGATGATTTGTATGTAAGATCTGACTTAGAAATAGAGCAGCACAGAACAAAAATAGAAGAACTGAGACAACATCTGTTAAGGTGAGGACTCACCACACCAGACAAGAAACATCAGAAAAAACCTCCATTTCTTTAGATAAGTTATGAACTCCATCCTGATAAATAGACAGTACAGCCTATACAGCTGCCAGACAAAGACAGCTAGACTGTCAATGACATACAGAAGTTAGTAAGAAAGTTGAATTAGGCAAGCCAGATCTATCCAGAGATTAAAGTAAAGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGCATATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGAGGAGAGACCAATAGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGAGAAATATGCAAGAACGAGAGGTGCCCACACTAATGATGTAAAACAGTTAACAGAGGCAGTGCAAAAAATAGCCCTAGAAGCCATAGTAATATGAAGAAAGACTCCTAAATTTAAACTACCTATACAAAAAGAAACATAGAAAATGTGGTGGACAAAGTATTGGCAAGCCACCTAGATTCCTGAGTAAGAATTTGTCAATACCCCTCCCTTAGTAAAACTATAGTACCAGTTAGAGAAAGAACCCATAGTAAGAGCAGAAACTTTCTATGTAAATAAGGCAGCTAATAGAAAGACTAAATTAAGAAAAGCAGAGTATGTTACGGACAGAAGAAGACAAAAGGTTGTCTCCCTAATAGACACAACAAATCAGAGGACTAAGTTACACGCAATTCATCTAGCTTTGCAGGATTCAGGATCAACAGTAAACATAGTGACAGACTCACAATATGCCTTAAAGATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGCTAATAAAAAAAGAAAAGATTTACCTGGCATAAGTCCCAGCACACAAAAGAATTGAAGGAAATGAACAAGTAGATAAATTAGTCAGTAATAGAATCAGAAGAGTACTATTTCTAGATAGAATAGATAAAGCCCAAGAAGAACATGAAAAATATCACAGTAATTAGAGAGCAATGGCTAGTAATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAGTGTCAGTTAAAAGGAGAACCTATGCATAGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAAGAAAAATTATCCTAGTAGCAGTTCATGTAGCCAGTAGATACATAGAAGCAGAAGTTATTGCAGCAGAAACAGGGCAGGAAACAGCATACTTTATCTTAAAATTAGCAGGAAGATAGCCAGTAAAAATAATACATACAGACAATGGCAGCAATTTCACCAGCACTACAGTTAAAGCCGCCTGTTAGTAGGCAGAGATCAAGCAGGAATTTAGTATTCCCTACAATCCTCAAAGTCAAAGAGTAGTAAAATCTATGAATAATAAATTAAAGAAAATTATAGGACAGGTAAAAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATTCATAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAACTACAAAAACAAATTTCAAAAATTCAAAATTTTCAGGTTTATTACAGGGACAGCAGAGAACCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCCTTAGAGATTATAGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG,2084,5095,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK115009.1,vif,4569,5147,forward,1.990212765957447,166,MAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQISKIQNFQVYYRDSREPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKILRDYRKQMAGDDCVASRQDED,ATAGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATAGAACAGTTTAGTAAAACACCATATGCATATTTCAAAGAAAGCTCAGAGATAGGTTTACAGACATCACTATGAAAGCCATAATCCAAAAACAAGTTCAGAAGTACACATCCCATTAGAGGAAGCAAGATTGGTAATAAAAACATATTAAGGTCTGCATACAGGAGAAAGAGACTGGCATTTAGGCCAGGGAGTATCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTGGCCTGGCAGACCAACTAATTCACCTGTATTATTTTGATTGTTTTTCAGAATCTGCTATAAGGAATGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAGGCAGGACATAACAAGGTAGGATCCCTACAATACTTGGCATTAACAGCATTAATAACACCAAAAAGGAGAAAGCCACCTTTGCCCAGTGTTACAAAATTAACAGAGGACAGATGGAACAAGTCCCAGAGGACCAAAGGCCACAAAAGGAGCCATACAATAAATGGACATTAG,5040,5618,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG -MK115009.1,vpr,5087,5377,forward,1.2802083333333347,88,MEQVPEDQRPQKEPYNKWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDT,ATGGAACAAGTCCCAGAGGACCAAAGGCCACAAAAGGAGCCATACAATAAATGGACATTAGAGCTCTTAGAAGAGCTTAAGAGTGAAGCTGTTAGGCATTTTCCTAGACCATGGCTTCATAGCTTAGGACAATATATCTATGAAACTTATGGAGATACTTAGGCAGAGGTAGAAGCCATAATAAGAACCCTGCAACAACTGTTGTTCATTCATTTCAGAATTAGGTGTCAACATAGCAGGATAGGAATTATTCGACAGAGGAGAGCAAGAAATAGAGCCAGTAGATCCTAG,5558,5848,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -MK115009.1,tat_exon1,5358,5572,forward,0.6712328767123288,35,MTACNNCYCKRCCFHCQVCFTTKSLGISYGRKKQRQRRRASHSSQNHQAALPEX,ATAGAGCCAGTAGATCCTAGCTTAGAGCCCTAGAAACATCCAGGAAGTCAACCTATGACTGCTTGTAACAATTGCTATTGTAAACGGTGTTGCTTTCATTGCCAAGTTTGCTTCACAACAAAAAGCTTAGGCATCTCCTATGGCAGGAAGAAGCAGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCA,5829,6043,MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115009.1,rev_exon1,5497,5572,forward,0.7037037037037037,0,MAGRSRDSDEELLTAVRIIKRLYQSX,ATGGCAGGAAGAAGCAGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCA,5968,6043,MAGRSGDSDEELIRTVRLIKLLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115009.1,vpu,5589,5834,forward,1.0363636363636366,0,MHALEIAAIAGLVVAAIIAIVVWSIVLIEYKKILRQRKIDRLINRIRERAEDSGNESDEDQEELSALVEMRHLVP,ATGCATGCCTTAGAAATAGCAGCAATAGCAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGTCTATAGTATTAATAGAATATAAGAAAATTTTAAGACAAAGGAAAATAGACAGGTTAATTAATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGACGAGGACCAAGAGGAATTATCCGCACTTGTGGAGATGAGGCATCTTGTTCCTTAAGATAGTAATGATATGTAG,6060,6308,MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*,ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG -MK115009.1,env,5752,8352,forward,1.8788617886178816,1437,MKEKGEIKNCSFNVTTGIRDKVTKEHALFYKLNVVPIDEDSKNTTGKYKMINCNTSVITQACPKVSFKPIPIHFCAPASFAILKCKDKKFNRTGECRNVSTVQCTHRIRPVVSTQLLLNGSLAEEKIVIRSANLSNNAKTIIVQLNKSVKINCTRPNNNTRRSIHIGPGRAFYRTDIIRDIRQAHYNISKKD,ATGAGAGTGACGAGGACCAAGAGGAATTATCCGCACTTGTGGAGATGAGGCATCTTGTTCCTTAAGATAGTAATGATATGTAGTGCCAACAACTTGTAGGTCACAGTCTATTATAAAGTACCTGTATAGAAAGAAGCAACTACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGAGACAGAGAAGCATAATGTTTAGGCAACCCATGCCTGTGTACCCACAGACCCCAGCCCACAGGAAGTAGCATTAGAAAATGTGACAGAAACATTTAACATGTAGAAAAATGACATGGTAGAGCAGATGCATGAGGATATAATCAGTTTATAGGATCAAAGCCTAAAGCCATGTGTGAAACTAACCCCACTCTGTGTTACTTTAAATTGCACTGATAATTTAAATCTAACTTGCCCTAACAATAATACTTGTAGTAATAATACTAATTATAATATGAAAGAAAAAGGAGAAATAAAAAACTGCTCTTTCAATGTCACCACAGGAATAAGAGATAAGGTGACAAAAGAACATGCACTTTTCTATAAACTTAATGTAGTACCAATAGATGAAGATAGTAAAAATACTACGGGCAAATATAAGATGATAAATTGTAACACCTCAGTCATTACACAAGCCTGTCCAAAGGTATCCTTTAAGCCAATTCCCATACATTTTTGTGCCCCGGCTAGTTTTGCGATTCTAAAGTGTAAAGATAAGAAATTCAATAGAACAGGAGAATGTAGAAATGTCAGCACAGTACAATGTACACATAGAATTAGGCCAGTAGTATCAACTCAACTGCTGCTAAACGGCAGTCTAGCAGAAGAAAAGATAGTAATTAGATCTGCCAATCTCTCGAACAATGCTAAAACCATAATAGTACAGCTGAATAAATCTGTAAAAATTAATTGTACAAGACCCAACAATAATACAAGAAGAAGTATACACATAGGACCAGGGAGGGCATTTTATAGAACAGACATAATAAGAGATATAAGACAAGCGCATTATAACATTAGTAAGAAAGATTAGAATGACACTTTAAAACAGATAGTTATAAAATTAAAAGAAAAATTTAAGAATAAAACAATAGTCTTTACTCAATCCTCAGGAGAAGACCCAGAGATAGTGATGCATAGTTTTAATTGTAGAGAGGAATTTTTCTACTGTAATACAACACAGCTGTTTAATAGTACTTAGAATAATAGTACTTAGAATAGTACTGATAATTAGAATAGTACTGAATCAAATAGCACTATCACACTCCCATGCAGAATAAAACAAATTATAAACTTGTGGCAGGAAGTAAGAAGAGCAATGTATGCCCCTCCCATCCAAAGGCAAATTAGATGTTCATCAAATATTACAGGGCTGCTGCTAGTAAGAGATGGTAGGAGCAACAATAGTAGTAATGATACAGAGACCTTCAGGCCTAGAGGAAGAGATATGAAGGACAATTAGAGAAGTAAATTATATAAATATAAAGTAGTCAAAATTAAACCATTAAGAATAGCACCCACCCATGCAAAGAGAAGAGTGGTGCAAAAGGAAAAAAGAGCAATAAGACTTGAAGCTTTCTTCCTTAGGTTCTTAAGAGCAGCAGGAAGCACTATAGGCGCAGCGTCAGTGACGCTGACAGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTAAGGCATCAAGCAGCTCCAGGCAAAAGTCCTGGCTTTAGAAAGATACCTAAAAGATCAACAGCTCCTAAGGATTTAAAGTTGCTCTGGAAAACTCATTTGCACCACTAATGTGCCCTGGAATATTAGTTGGAGCCCTAGATGGAATAGATCTCTAGATAAGATTTAGACTAACATGACCTAGAAGCAGTAAGAAAAAGAAATTGACAATTACACAGACATAATATATAACTTAATTGAACAATCACAGAACCAACAAGAACAGAATGAACAAGACTTATTGGCATTAGATAAGTAGGCAAGTTTGTAAAATTAGTTTGACATTACACAGTAGCTATAGTATATAAAAATATTCATAATAATAGTAAGAGGCTTGATAAGTTTAAGAATAGTTTTTACTATACTGTCTATAGTGAATAAAGTTAGGCAAGGATACTCACCATTGTCATTGCAGACCCTCCTCCCAACCCAGAGAGGACCCGACAGGCCCGAAAGAACAGAAGAAAGAGGTAGAAAGAAAGACAGAGGCACATCCACGAGATTAGTGCATAGATTCTTAGCACTCATCTAGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAAAGACTTACTCTTAATTGCGGCGAGAATTGTAGAACTTCTAGGACGCAGAAAGTAAGAAGCACTGAAGTATTAGTAGAATCTCCTGCAGTATTAGAGTCAGGAAATAAAGAATAGTGCAGTTAGCTTGCTCAACACCACAGCAATAGCAGTAGCTGAGAGGACAGATAAAATTATAGAAGTATTACAAAGAGGCTTTAGAGCTATTCTCCACATACCTACACGAATAAGACAGGGCTTAGAAAAGGCTTTGCTATAA,6223,8793,MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*,ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA -MK115009.1,tat_exon2,7934,8026,forward,0.7741935483870968,0,RPSSQPREDPTGPKEQKKEVERKTEAHPRD,AGACCCTCCTCCCAACCCAGAGAGGACCCGACAGGCCCGAAAGAACAGAAGAAAGAGGTAGAAAGAAAGACAGAGGCACATCCACGAGATTAG,8375,8467,RPTSQPRGDPTGPKEPKKKVERETETDPFD*,AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG -MK115009.1,rev_exon2,7935,8210,forward,0.7826086956521738,70,ILSTHLGRPAEPVPLQLPPLERLTLNCGENCRTSRTQKVRSTEVLVESPAVLESGNKE,GACCCTCCTCCCAACCCAGAGAGGACCCGACAGGCCCGAAAGAACAGAAGAAAGAGGTAGAAAGAAAGACAGAGGCACATCCACGAGATTAGTGCATAGATTCTTAGCACTCATCTAGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAAAGACTTACTCTTAATTGCGGCGAGAATTGTAGAACTTCTAGGACGCAGAAAGTAAGAAGCACTGAAGTATTAGTAGAATCTCCTGCAGTATTAGAGTCAGGAAATAAAGAATAG,8376,8651,DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*,GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG -MK115009.1,nef,8354,8995,forward,1.7004854368932047,395,MTYKRALDISHFLKEKGGLDGLIYSKRRQDILDLWLYNTQGYFPDWQNYTPGPGVRFPLCFR,ATAGGTAACAAGTTGTCAAGAAGGCTCAAGGCTAGATGGCCTGCCATAAAGGAGAAAATAAGACGAGCTAGGCCAGTAAGAGAGCCAGAGCCAGCAACAGCTAAGGTAAGAGCAGCATCTCGAGACCTGAAAAGACATGGAGCACTTACAAGTAGCAATACAGCAGCTACCAATGCTGATGTTGCCTGCCTAGAAGCACAACAGAAGAAAGAAGAGGTAAGTTTTCCAGTCAGACCTCAGGTACCTTTAAGGCCAATGACTTACAAGAGAGCTCTAGATATTAGCCACTTTTTAAAAGAAAAAGGGGGACTGGATGGGCTAATTTACTCCAAAAGAAGACAAGACATCCTTGATCTGTGGCTCTACAACACACAAGGCTACTTCCCTGACTGGCAGAACTACACACCAGGGCCAGGAGTCAGATTTCCACTGTGTTTTAGATAGTGCTTCAAGCTAGTACCAGTTGATCCAGATAAAGTAGAAGAGGCCAGTGTAAGAGAGAACAACTGCTTGTTAAGCCCCGAAAACCTGCATAGAATGGAGGACGAACACAGAGAAGTATTGCAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACATAGCCCGAGAGCTGCATCCGGAGTATTACAAGGACTGCTGA,8795,9415,MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA -MK115387.1,gag,292,1794,forward,0.21157684630738527,0,MGARASVLSGGELDKWEKIRLRPGGKKRYKLKHIVWASRELERFAVNPGLLETTEGCKQILEQLQPSLPTGSEELRSLFNTVATLYCVHKRIEVQDTKEALEKIEEEQNKSKKKAQQAVADTGSTSQVSQNYPIVQNLQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPVAPGQMREPRGSDIAGTTSTLQEQIAWMTSNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYKVLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSNIIMMQRGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSSKGRPGNFLQSRPEPTAPPEESFRFGEETATPPQKQEPVDKEVYPLASLKSLFGNDPSSQ,ATGGGTGCGAGAGCGTCTGTATTAAGCGGGGGAGAATTAGATAAATGGGAAAAAATTCGGTTAAGGCCAGGAGGAAAGAAAAGATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAACCCCGGCCTTTTAGAAACAACAGAGGGATGTAAACAAATACTGGAACAGCTGCAACCATCCCTTCCGACAGGATCAGAAGAACTTAGATCATTATTTAATACAGTAGCAACCCTCTATTGTGTGCATAAGAGAATAGAGGTACAAGACACCAAGGAAGCCTTAGAAAAGATAGAAGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAGGCAGTAGCTGACACAGGAAGTACCAGCCAGGTCAGCCAAAATTACCCGATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGGCCATATCGCCTAGAACTTTAAATGCATGGGTGAAAGTAGTAGAAGAGAAGGCCTTTAGCCCAGAGGTAATACCCATGTTTTCAGCATTATCGGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAGGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTGTTGCACCAGGCCAAATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAAGAACAAATAGCATGGATGACAAGTAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTGGGATTAAATAAGATAGTAAGAATGTATAGCCCTACCAGCATTTTGGACATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAGTCCTAAGAGCCGAGCAAGCATCGCAGGATGTAAAAAATTGGATGACAGAAACCTTATTGGTCCAAAATGCAAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCAGCAACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGGGGACCCGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAAATATCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGACTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTCCAAGGGAAGGCCAGGGAACTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAGGAGAGCTTCAGGTTTGGGGAGGAGACAGCAACTCCTCCTCAGAAGCAGGAGCCGGTGGACAAGGAAGTGTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAA,789,2291,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA -MK115387.1,pol,1587,4598,forward,0.13147410358565748,0,FFREDLAFLQGKARELSSEQTRANSPTRGELQVWGGDSNSSSEAGAGGQGSVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVQLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSITVLDVGDAYFSIPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMIKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLKWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPDKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLKGAKALTEVIQLTEEAELELAENREILKEPVHGVYYDPSKDLIAELQKQGQGQWTYQIYQEPFKNLKTGKYARTRGTHTNDVKQLTEAVQKITTESIVIWGKTPRFKLPIQKETWDTWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTSRGRQKVVSLTDTTNQKTELQAICLALQDSGLEVNIVTDSQYALGIIQAQPDRSESEIVNQIIEQLIKKERVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQEEHEKYHNNWRAMASDFNLPPVVAKEIVASCDKCQLKGEATHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKAIHTDNGTNFTSATVKAACWWAGIKQECGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDNRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,TTTTTTAGGGAAGATCTGGCCTTCCTCCAAGGGAAGGCCAGGGAACTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAGGAGAGCTTCAGGTTTGGGGAGGAGACAGCAACTCCTCCTCAGAAGCAGGAGCCGGTGGACAAGGAAGTGTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGAGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGACACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCTATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGCAGAAATTTGTTGACTCAGATTGGTTGTACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTACAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAAGAAGGGAAGATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGGGAACTTAATAAAAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACACCCCGCAGGGTTAAAAAAGAAGAAATCAATAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAATAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGATAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGACATAGTTATCTATCAATACATGGATGACTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACACTTGTTGAAGTGGGGATTGACCACACCAGATAAAAAACATCAGAAAGAACCCCCATTCCTGTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGACAAAGATAGCTGGACTGTCAATGACATACAGAAGCTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTATGCAGGGATTAAAGTGAGACAATTATGTAAACTCCTTAAGGGAGCCAAAGCGCTAACAGAAGTGATACAACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAGTTACAGAAGCAGGGGCAAGGCCAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAGTATGCAAGAACGAGGGGTACCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTGATATGGGGAAAGACCCCTAGATTTAAACTACCCATACAGAAAGAAACATGGGATACCTGGTGGACAGAATATTGGCAAGCCACCTGGATTCCCGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAAAAAGAACCTATTGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAGCAGAGGAAGACAAAAGGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAACTGCAAGCAATTTGTCTAGCATTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCACTAGGAATAATTCAAGCACAACCAGATAGGAGTGAATCAGAGATAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAGGGTCTACCTTGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTCCTATTTTTGGATGGAATAGATAAGGCCCAAGAGGAGCATGAGAAATATCACAATAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGTTGTGATAAATGCCAGCTAAAGGGAGAAGCCACGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAATTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAAGAAACAGCATACTTCCTCTTAAAATTAGCAGGAAGGTGGCCAGTGAAAGCAATACATACAGACAATGGAACCAATTTCACCAGTGCTACGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATGTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAAGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTCCAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAACAGAGATCCACTTTGGAAAGGACCAGCAAAACTTCTTTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG,2084,5095,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK115387.1,vif,4543,5121,forward,0.32164948453608244,0,MENRWQVMIVWQVDRMRISTWKSLVKHHMYISKKAQGWFYRHHYENPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIECRERKYSTQVTPDLADQLIHLYYFDCFAESAIREAILGHRVSPRCEYQAGHNKVGSLQYLALTALVTPKKTKPPLPSVTKLTEDRWNKPQKTKGHRGSQTMNGH,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGCACATGGAAAAGTTTAGTAAAACACCATATGTACATTTCAAAGAAAGCCCAGGGATGGTTTTATAGACATCACTATGAAAATCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCGCTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAAGGAGTCTCCATAGAATGCAGGGAAAGGAAATATAGCACACAAGTAACCCCTGACCTAGCAGACCAACTAATTCACCTGTATTATTTTGATTGTTTTGCAGAATCTGCTATAAGAGAGGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCACTAACAGCATTAGTAACACCAAAAAAGACAAAGCCACCTTTGCCTAGTGTGACAAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCAAACAATGAATGGACACTAG,5040,5618,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG -MK115387.1,vpr,5061,5351,forward,0.21875,0,MEQAPEDQGPQREPNNEWTLELLEELKREAVRHFPRTWLHGLGQHIYETYGDTWTGVEALIRILQQLLFIHFRIGCQHSRIGITRQRRARNGASRS,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCAAACAATGAATGGACACTAGAGCTCTTAGAAGAGCTTAAGAGAGAAGCTGTTAGACATTTTCCTAGGACATGGCTTCATGGCTTGGGACAACATATCTATGAAACTTATGGGGATACTTGGACAGGAGTGGAAGCCTTAATAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGAATTGGGTGCCAACATAGCAGAATAGGCATCACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG,5558,5848,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -MK115387.1,tat_exon1,5332,5546,forward,0.375,0,MEPVDPRLEPWKHPGSQPKTPCTKCYCKKCCFHCQVCFITKGLGISYGRKKRRQRRRPPQDSQTHQVPLPKX,ATGGAGCCAGTAGATCCTAGATTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAGACTCCTTGTACCAAATGCTATTGTAAAAAATGCTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTACCTCTACCAAAGCA,5829,6043,MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115387.1,rev_exon1,5471,5546,forward,0.46153846153846145,0,MAGRSGDSDEDLLKTVRLIKYLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTACCTCTACCAAAGCA,5968,6043,MAGRSGDSDEELIRTVRLIKLLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115387.1,vpu,5563,5808,forward,0.7764705882352942,0,MQSLYILTIVALVVAAILAIVVWAIVLIEYKKILKQRRIDRLIDRIIDRAEDSGNESEGDQEELSALVEMGHHAPWNVDDL,ATGCAATCCTTATATATATTAACAATAGTAGCATTAGTAGTAGCAGCAATATTAGCAATAGTTGTGTGGGCCATAGTACTCATAGAATATAAGAAAATATTAAAACAAAGGAGAATAGATAGGTTAATTGATAGAATAATAGATAGGGCAGAAGACAGTGGCAATGAGAGTGAAGGAGATCAAGAGGAGTTATCAGCACTTGTGGAGATGGGGCATCATGCTCCTTGGAATGTTGATGATCTATAG,6060,6308,MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*,ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG -MK115387.1,env,5726,8287,forward,0.6926339285714282,0,MRVKEIKRSYQHLWRWGIMLLGMLMIYSTADQWWVTVYYGVPVWREANTTLFCASDAKAYSTEAHNVWATHACVPTDPNPQEIVIGNVTEDFNMWKNNMVDQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDIRNSTVTSNTTTWGEMTNCTFNITTSIKDKMKKEAALFYKIDLVEIDEEKNNSSTRYRLINCNTSAITQACPKVSFEPIPIHFCAPAGFAILKCNNKKFSGKGPCTNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVMIRSDNITDNTKNIIVQLKEAIQIFCIRPNNNTRKSINIGPGRAFYTTGDIIGDIRQAHCNISGNWNNTLKQIATQLGKQLNQTQQIIFNSSAGGDPEIVTHSFNCGGEFFYCNSSSLFNSTWTKNGTGSWQSNDTQNGNITLQCRIKQIINLWQGVGKAMYAPPISGQINCTSNITGLVLTRDGGKVINETETFRPGGGNMKDNWRSELYKYKVVRIEPLGIAPTKAKRRVVQREKRAVGLGAMFLGFLGAAGSTMGAASIALTEQARRVLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQLLGLWGCSGKLICTTTVPWNRSWGGHNKNLDDIWGNMTWMEWEKEIDNYTSLIYTLITESHSQQEKNEQELLALDKWASLWNWFDISQWLWYIKIFIMIVGGLVGLRIVFAVLSIVNKVRQGYSPLSFQTLLPAPRGADRPEGIEEGGGERDKGRSGRLVNGFLALIWDDLRSLCLFSYHRLSDLLLIVIRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNTTAIVVAEGTDRIIEILQRIGRAFLHIPRRIRQGLERALL,ATGAGAGTGAAGGAGATCAAGAGGAGTTATCAGCACTTGTGGAGATGGGGCATCATGCTCCTTGGAATGTTGATGATCTATAGTACTGCAGACCAGTGGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAGAGAAGCAAACACCACTCTATTTTGTGCATCAGATGCTAAAGCATATAGTACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAATAGTAATAGGAAATGTAACAGAAGATTTTAACATGTGGAAGAATAACATGGTAGATCAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGTCTAAAGCCATGTGTGAAACTAACCCCACTCTGTGTTACTTTAAATTGTACTGATATTAGGAATAGTACCGTTACTTCTAATACTACTACTTGGGGAGAAATGACAAACTGCACTTTCAATATCACCACAAGCATAAAGGATAAGATGAAAAAAGAAGCGGCACTTTTTTATAAAATTGATTTAGTAGAAATAGATGAAGAGAAAAATAACAGTAGTACCAGATATAGGTTGATAAATTGTAACACCTCAGCCATTACACAAGCCTGCCCAAAGGTATCCTTTGAGCCAATTCCCATACATTTTTGTGCCCCGGCTGGTTTTGCAATTCTAAAGTGTAATAATAAGAAGTTCAGTGGAAAGGGACCATGTACAAATGTCAGCACAGTGCAATGTACACATGGAATTAAGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAATGATTAGATCTGACAATATCACAGACAATACTAAAAACATAATAGTACAGTTGAAGGAAGCCATACAAATTTTCTGTATAAGACCCAACAACAATACAAGAAAAAGTATAAATATAGGACCAGGGAGAGCATTTTACACAACAGGAGATATAATAGGAGACATAAGGCAAGCACATTGTAACATTAGTGGAAATTGGAATAACACTTTAAAACAGATAGCTACACAATTAGGAAAACAACTGAATCAAACACAACAAATAATCTTTAATTCATCCGCAGGAGGGGACCCAGAGATTGTAACACACAGTTTTAATTGTGGAGGGGAATTCTTCTATTGTAATTCATCATCACTGTTTAATAGTACCTGGACTAAAAATGGTACTGGTAGTTGGCAGTCTAATGATACTCAGAATGGTAATATCACACTCCAATGCAGAATAAAACAAATTATAAACCTGTGGCAGGGAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAACTGTACATCAAATATTACAGGGCTGGTTTTAACAAGAGATGGGGGGAAGGTGATTAATGAAACTGAGACCTTTAGACCTGGAGGAGGAAATATGAAGGATAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAGAATTGAACCATTAGGAATAGCACCCACCAAGGCAAAAAGAAGAGTGGTGCAAAGAGAGAAAAGAGCAGTAGGACTGGGAGCTATGTTCCTTGGGTTCTTGGGAGCAGCCGGAAGCACTATGGGCGCAGCGTCAATAGCGCTGACGGAACAGGCCAGACGAGTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATTTGTTGCAACTCACAGTCTGGGGCATTAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCTCCTAGGACTTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTACTGTGCCTTGGAATCGTAGTTGGGGTGGGCATAACAAAAATCTAGATGACATTTGGGGTAACATGACCTGGATGGAGTGGGAAAAAGAAATTGACAATTACACAAGCTTAATATACACCTTAATTACAGAATCACATTCCCAACAAGAAAAGAATGAACAAGAATTATTGGCATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTGACATATCACAATGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAAAGTTAGGCAGGGATACTCACCATTATCATTTCAGACCCTCCTCCCAGCTCCGAGGGGAGCCGACAGGCCCGAAGGAATAGAAGAAGGAGGTGGAGAGAGAGACAAAGGCAGATCCGGTCGATTAGTGAACGGATTCTTAGCACTTATCTGGGACGATCTACGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGCGACTTACTCTTGATTGTGATCAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTGCAGTACTGGAGTCAGGAACTAAAGAATAGTGCTGTCAGCTTGCTCAACACCACAGCTATAGTAGTAGCTGAAGGGACAGATAGGATAATAGAAATATTACAAAGAATTGGTAGAGCTTTTCTCCACATACCTAGGAGAATAAGACAGGGCTTAGAAAGGGCTTTGCTATAA,6223,8793,MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*,ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA -MK115387.1,tat_exon2,7869,7961,forward,0.7741935483870968,32,RPSSQLRGEPTGPKE,AGACCCTCCTCCCAGCTCCGAGGGGAGCCGACAGGCCCGAAGGAATAGAAGAAGGAGGTGGAGAGAGAGACAAAGGCAGATCCGGTCGATTAG,8375,8467,RPTSQPRGDPTGPKEPKKKVERETETDPFD*,AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG -MK115387.1,rev_exon2,7870,8145,forward,0.2934782608695652,0,DPPPSSEGSRQARRNRRRRWRERQRQIRSISERILSTYLGRSTEPVPLQLPPLERLTLDCDQDCGTSGTQGVGSPQILVESPAVLESGTKE,GACCCTCCTCCCAGCTCCGAGGGGAGCCGACAGGCCCGAAGGAATAGAAGAAGGAGGTGGAGAGAGAGACAAAGGCAGATCCGGTCGATTAGTGAACGGATTCTTAGCACTTATCTGGGACGATCTACGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGCGACTTACTCTTGATTGTGATCAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTGCAGTACTGGAGTCAGGAACTAAAGAATAG,8376,8651,DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*,GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG -MK115387.1,nef,8289,8939,forward,0.534862385321101,0,MGGKWSKSSRVGWNAVRERMRRAQPTADRERAEPAADGVGAASRDLEKYGALTSRNTAATNADCAWLEAQEEEDEVGFPVRPQVPLRPMTYKAALDLSHFLKEKGGLEGLVYSQKRQDILDLWIYHTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPLEPDQVEEANEGENNSLLHPLSQHGMDDPEKEVLVWRFDSRLAFHHMAREKHPEFYKDC,ATGGGTGGCAAGTGGTCAAAAAGTAGTAGGGTTGGATGGAATGCAGTGAGGGAAAGAATGAGACGAGCTCAGCCAACAGCAGATAGGGAACGAGCTGAGCCAGCAGCAGATGGGGTGGGAGCAGCATCTAGAGACCTGGAAAAATATGGAGCACTTACAAGTAGGAATACAGCAGCTACTAATGCTGATTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGATGAGGTGGGTTTTCCAGTCAGACCTCAGGTACCTTTAAGACCCATGACTTATAAAGCAGCTCTAGATCTGAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAGTTTACTCCCAAAAAAGGCAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTATTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATCAGATATCCCCTGACCTTTGGATGGTGCTTCAAGCTAGTACCACTTGAGCCAGATCAGGTAGAAGAGGCCAATGAAGGAGAGAACAACAGCCTGTTACACCCTCTGAGCCAGCATGGGATGGATGACCCGGAGAAAGAAGTGCTAGTGTGGAGGTTTGACAGCCGCCTCGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTTCTACAAAGACTGCTGA,8795,9415,MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA -MK115491.1,gag,521,2020,forward,0.2675944333996021,0,MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE,ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGACGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGCTAATGTAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA,789,2291,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA -MK115491.1,pol,1813,4824,forward,0.15109343936381703,0,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTACTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG,2084,5095,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK115491.1,vif,4769,5347,forward,0.36269430051813467,0,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG,5040,5618,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG -MK115491.1,vpr,5287,5577,forward,0.2886597938144331,0,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS,ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG,5558,5848,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -MK115491.1,tat_exon1,5558,5772,forward,0.25,0,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA,5829,6043,MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115491.1,rev_exon1,5697,5772,forward,0.46153846153846145,0,MAGRSGDSDEELLKAVRLIKILYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA,5968,6043,MAGRSGDSDEELIRTVRLIKLLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115491.1,vpu,5789,6034,forward,0.727586206896552,0,MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL,ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAAAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG,6060,6308,MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*,ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG -MK115491.1,env,5952,8492,forward,0.5241695303550973,0,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL,ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGAAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAAAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA,6223,8793,MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*,ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA -MK115491.1,tat_exon2,8074,8166,forward,0.29032258064516125,0,RPTSQPRGDPTGPKESKKKVEKETETDQFD,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG,8375,8467,RPTSQPRGDPTGPKEPKKKVERETETDPFD*,AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG -MK115491.1,rev_exon2,8075,8350,forward,0.3913043478260869,0,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG,8376,8651,DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*,GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG -MK115491.1,nef,8494,9108,forward,0.5333333333333332,0,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC,ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTATGAGCCTGCATGGGATGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA,8795,9415,MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA -MK116110.1,gag,117,1600,forward,1.730938123752496,973,MSQVNSTTVMMQKGNFRNQKKTVKCFNCGKIGHIAKNCRAPRRKGCWKCGQEGHQMKDCSERQANFLGKLWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETATPPQKQEPIDRELYSLASLKSLFGNDPSSQ,GCTGGTCCCAATGCTTTTAAAATAGTCTTACAATCTGGGTTTGCATTTTGGACCAACAAGGTTTCTGTCATCCAATTTTTTACATCCTGTGAAGCTTGCTCGGCTCTTAGGGTTTTATAGAACCGGTCTACATAGTCTCTAAAGGGTTCCTTTGGTCCTTGTTTTATGTCCAAAATGCTGACAGGACTATACATTCTTACTATTTTATTTAATCCCAGGATTACCCATCTTTTATAGATATCTCCTACTGGGATAGGTGGATTATTTGTCATCCATCCTATTTGTTCCTGAAGGGTACTAGTAGTTCCTGCTATATCACTTCCCCTTGGTTCTCTCATTTGGCCTGGTGCAACAGGCCCTGCATGCACTGGATGCAATCTATCCCATTCTGCAGCTTCCTCATTGATGGTCTCTTTTAATATTTGCATTGCTGCTTGATGTCCCCCCACTGTATTTAGCATGGTGTTTATATCTTGTGGGGTGGCTCCTTCTGCTAATGCTGAAAACATAGGTATTACTTCTGGGCTAAAAGCCTTTTCTTCTACTACTTTTACCCATGCATTTAAAGTTCTAGGTGACATGGCCTGATGTACCATTTGCCCCTGGAGGTTTTGCACTATAGGGTAGTTTTGGCTGACCTGGCTGTTATTTCCTGCGCCAGCTGCTGCTTGCTGTGCTTTCATCTTGCTTTTGTTTTGCTCTTCCTCTATCTTATCTAGCGCTCCCTTGGTGTCTTGTATCTCTATCCTTTGATGTATACAATAGAGGACCGCTACTGTATTATATAATGATTTAAGCTCTTCTGACCCTGTTTGGAGGGATGGCTGTAGCTGTCCCAGTATTTGTCTACAGCCTTCTGATGTTTCTAACAGGCCAGGATTAACTGCGAATCGTTCTAGCTCCCTGCTTGCCCATACTAGATGTTTTAACCTATATTTTTTCTTTCCTCCTGGCCTTAACCGAATTTTTTCCCATTGGTCTAATTTTCCCCCGCTTAATACTGACGCTCTCGCACCCATCTCTCTCCTTCTAGCCTCCGCTAGGTAAAAACTTTTTTGGCGTACTCACCAGTCGCCGAAGCAATGAGCCAAGTAAATTCAACTACCGTAATGATGCAGAAAGGCAATTTTAGGAACCAAAAGAAGACTGTTAAGTGTTTCAACTGTGGTAAAATAGGGCATATAGCAAAAAATTGCAGGGCCCCCAGGAGAAAGGGCTGTTGGAAATGTGGACAGGAAGGACACCAGATGAAAGATTGTAGTGAGAGACAGGCTAATTTTTTAGGGAAACTCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAGGAGAGCTTCAGGTTTGGGGAAGAGACAGCAACTCCCCCTCAGAAGCAGGAGCCGATAGACAGGGAACTATATTCCTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCTCAATAA,140,1642,MGARASVLSGGQLDRWEKIRLRPGGKKKYRLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTIAVLYCVHQKIEVKDTKEALEKIEEEQNKSKKKAQQAAANTENSSQVSQNYPIVQNMQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQKGNFRNQRKIVKCFNCGKEGHIARNCRAPRKKGCWRCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPAESFRFGEETTTPSQKQETIDKELYPLTALKSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGACAATTAGATAGATGGGAGAAAATTCGGTTACGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTATTGGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCAAGCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAATAGCAGTCCTCTATTGTGTACATCAAAAGATAGAGGTAAAAGACACCAAGGAAGCTTTAGAGAAGATAGAGGAAGAACAAAACAAAAGTAAGAAAAAGGCACAGCAAGCAGCAGCTAACACAGAAAACAGCAGCCAGGTTAGCCAAAATTACCCTATAGTGCAAAATATGCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACACATAATCCACCTATCCCAGTGGGAGAAATTTACAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGCATTTTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAGACTCTAAGAGCCGAGCAAGCCTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGTCCCGGCCATAAGGCAAGAGTTTTGGCGGAAGCAATGAGCCAAGTGACAAATTCAGCTACCATAATGATGCAGAAAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCCGAAATTGCAGGGCCCCTAGGAAGAAGGGCTGTTGGAGATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAAAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCGACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGAGACAATAGACAAGGAACTATATCCTTTAACTGCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAA -MK116110.1,pol,1393,4404,forward,0.20376984126984143,0,FFRETLAFPQGEAREFPSEQTRANSPTRGELQVWGRDSNSPSEAGADRQGTIFLSFPQITLWQRPLVSIKVGGQLKEALLDTGADDTVLEEMCLPGKWKPKMIGGIGGFIKVRQYDQIPIEIYGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVRLKPGMDGPKIKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPIFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSVNNETPGIRYQYNVLPQGWKGSPAIFQASMTKILEPFRKQNPDMVIYQYMDDLYIGSDLELGQHRTKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPITLPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKSLTEVVPLTREAELELAENREILKEPVHGVYYDPSKDLIAELQKQGQGQWTYQIYQEPFKNLKTGKYARIKGTHTNDVKQLTQAVQKIATESIVIWGKTPKFRLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKDPIVGAETFYVDGAANRDTKLGKAGYVTDRGRQKIVPLTDTTNQKTELQAIYLALQDSGSEVNIVSDSQYAIGILQAQPDKSESELVNQIIEQLIKKEKVYLTWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPIVAKEIVASCDKCQLKGEAIHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVNTIHTDNGSNFTSTAVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGRYSAGERIVDMIASDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSEIKVVPRRKAKIIRDYGKQMAGDDCVAGRQDED,TTTTTTAGGGAAACTCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAGGAGAGCTTCAGGTTTGGGGAAGAGACAGCAACTCCCCCTCAGAAGCAGGAGCCGATAGACAGGGAACTATATTCCTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCTCAATAAAAGTAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGTGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAATATGATCAGATACCCATAGAAATCTATGGACATAAAGCTATAGGTACAGTGTTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATTTGTTGACTCAGATTGGGTGCACTTTAAATTTTCCCATTAGTCCTATCGAAACTGTACCAGTAAGATTAAAGCCAGGAATGGATGGCCCAAAAATTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAGATTTCAAAGATTGGGCCTGAAAATCCATACAATACTCCAATATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGGGAACTTAATAAGAGAACTCAAGATTTCTGGGAAGTTCAATTGGGAATACCGCATCCCGCAGGATTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCATTAGATAAAGACTTTAGGAAGTATACTGCATTTACCATACCCAGTGTAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAGCTAGCATGACAAAAATTTTAGAGCCTTTTAGGAAGCAAAATCCAGACATGGTTATTTATCAATACATGGATGATCTATATATAGGATCTGACTTGGAATTAGGACAGCATAGGACAAAAATAGAGGAACTGAGACAACATCTATTGAGGTGGGGGTTTACCACACCAGACAAGAAGCATCAGAAAGAACCTCCATTCCTCTGGATGGGGTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAACACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTAGGAAAATTAAATTGGGCAAGTCAGATTTATGCAGGGATTAAAGTAAGGCAGTTATGTAAACTCCTTAGAGGAACCAAATCACTAACAGAAGTAGTACCACTAACAAGAGAGGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCGGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAATTACAAAAGCAGGGACAAGGCCAGTGGACTTATCAGATTTATCAAGAACCATTTAAAAATCTGAAAACAGGAAAGTATGCAAGAATAAAGGGTACCCACACTAATGATGTAAAACAATTAACACAGGCTGTGCAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCCATACAAAAAGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATCCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAGAAAGACCCCATAGTAGGAGCAGAAACATTCTATGTCGATGGGGCAGCCAATAGGGATACTAAATTAGGAAAAGCAGGATATGTTACTGACAGGGGAAGACAAAAAATTGTCCCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTACCTAGCTCTGCAGGATTCAGGATCAGAAGTAAACATAGTATCAGACTCACAGTATGCAATAGGAATTCTTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGACATGGGTGCCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTATTATTCTTAGATGGAATAGATAAGGCCCAAGAAGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCCATAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGTTAAAAGGAGAAGCCATACATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTGGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTGGCCAGTGGATATATTGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATATTTTATCTTAAAATTAGCAGGAAGATGGCCAGTAAACACAATACATACAGACAATGGCAGCAACTTCACTAGCACTGCGGTTAAAGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGGGATCAAGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGAGGTACAGTGCAGGGGAAAGAATAGTAGACATGATAGCATCAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAACAGTGAAATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGACTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGATTAA,1435,4446,FFRENLAFPQGKAREFSSEQTRADSPTSRELQVWGRDNNSLSEAGDNRQGTISFNCPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMNLPGRWKPKMIGGIGGFIKVRQYDQILVEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQCSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTKEAELELAENREILKETVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESEIVSQIIEQLIQKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESINKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCGACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGAGACAATAGACAAGGAACTATATCCTTTAACTGCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGGGCAGATGATACAGTATTAGAAGAAATGAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAATATGATCAGATACTTGTAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATTTGTTGACTCAGCTTGGTTGCACTTTAAATTTTCCTATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAAAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTGAAAAAGAAAAAATCAGTAACGGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATAAAGACTTCAGGAAATATACTGCATTTACCATACCGAGTACAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAATGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTGAGGTGGGGATTTACCACACCAGATAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAACCTATAGTGCTGCCGGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAAGCAGTTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAGTAGTACCACTAACAAAAGAGGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTCTAAAAGAAACAGTACATGGAGTGTATTATGACCCATCAAAAGATTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTAAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTCAAACTACCCATACAAAAAGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAAAAAGAGCCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCGGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTGACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAAAAGACTGAGTTACAAGCAATTCACCTAGCTTTGCAGGATTCGGGATTAGAAGTAAATATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGATAGTCAGTCAAATAATAGAGCAGTTAATACAAAAGGAAAAGGTCTACCTGGCATGGGTACCAGCACACAAGGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGAGCATGAAAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAGGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTACTACGGTTAAGGCTGCCTGTTGGTGGGCGGGGATCAAGCAGGAATTTGGCATCCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATAAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAAACAGCAGTACAAATGGCAGTATTTATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAGACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTGTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAA -MK116110.1,vif,4349,4927,forward,0.546875,0,MENRWQVMIVWQVDRMRINAWKSLVKHHMHVSRKVERWVYKHHYESTNPRISSEVHIPLGDARLKITTYWGLHTGERDWHLGQGVSIEWRKKSYNTQVDPEVADQLIHLYYFDCFSESAIRKAIVGHRVSPSCEYQAGHNKVGSLQYLALAALVKSKKTKPPLPSVTKLTEDRWNKPQRTKGRRGNHIMNGH,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGATTAATGCATGGAAAAGCTTAGTAAAGCACCATATGCATGTTTCAAGGAAAGTTGAGAGATGGGTTTATAAACATCACTATGAAAGTACTAATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTAAAAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGCCAGGGAGTCTCCATAGAATGGAGAAAAAAGAGCTATAATACACAAGTAGACCCTGAAGTAGCAGACCAACTAATCCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAAAGCCATAGTAGGACATAGAGTTAGTCCTAGTTGTGAATATCAAGCAGGACATAACAAGGTAGGATCCCTACAGTACTTGGCATTAGCAGCATTAGTAAAATCAAAAAAGACAAAGCCACCTTTGCCTAGCGTTACGAAGCTGACGGAGGATAGATGGAACAAGCCCCAGAGGACCAAGGGCCGCAGAGGGAACCATATAATGAATGGGCACTAG,4391,4969,MENRWQVMIVWQVDRMRIKTWKSLVKHHMYVSKKAKGWLYRHHYQSIHPRISSEVHIPLGEASLVIKTYWGLHTGEREWHLGQGVSIEWRKGRYNTQVDPGLADQLIHLYYFDCFSESAIRNAILGHIVSPSCEYQAGHNKVGSLQYLALTALRTPKKIKPPLPSVRKLTEDRWNKPQKTKGHRESHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAAAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAAAGAAAGCTAAGGGATGGTTGTATAGACATCACTATCAAAGCATTCATCCAAGAATAAGTTCAGAAGTACATATCCCACTAGGAGAGGCTAGCTTGGTAATAAAGACATATTGGGGTCTGCATACAGGAGAAAGAGAATGGCATTTGGGTCAAGGAGTCTCCATAGAATGGAGGAAAGGAAGATATAACACACAAGTAGACCCAGGCCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGTTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCACTAACAGCATTAAGAACACCAAAGAAGATAAAGCCACCTTTGCCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAG -MK116110.1,vpr,4867,5157,forward,0.514285714285714,0,MEQAPEDQGPQREPYNEWALELLEELKREAVRHFPRIWLQSLGQYVYETYGDTWTGVEAIIRILQQMLFIHFRIGCQHSRIGIIRRGRTRNGASRP,ATGGAACAAGCCCCAGAGGACCAAGGGCCGCAGAGGGAACCATATAATGAATGGGCACTAGAGCTTTTAGAGGAGCTTAAGAGAGAAGCTGTTAGACATTTCCCTAGGATATGGCTTCAGAGCTTAGGACAATACGTCTATGAAACTTATGGGGATACTTGGACAGGAGTGGAAGCCATAATAAGAATTTTGCAACAAATGCTGTTCATTCATTTCAGAATTGGGTGTCAACATAGCAGAATAGGCATTATTCGACGAGGGAGAACAAGAAATGGAGCCAGTAGACCCTAG,4909,5199,MEQAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRTWLHGLGQYIYENYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGITLQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACACTTTCCTAGGACATGGCTCCACGGATTAGGGCAATATATCTATGAAAATTATGGGGACACTTGGGCAGGAGTGGAGGCCATAATAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGGATTGGGTGTCGACATAGCAGAATAGGCATTACTCTACAAAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -MK116110.1,tat_exon1,5138,5352,forward,0.5890410958904109,0,MEPVDPSLAPWKHPGSQPKTACTNCYCKKCCLHCQVCFTKKGLGISYGRKKRRQRRRPPQSSKAHQNPLPKX,ATGGAGCCAGTAGACCCTAGCCTAGCGCCCTGGAAGCACCCAGGAAGTCAGCCTAAGACTGCTTGTACCAATTGCTATTGTAAAAAGTGCTGCTTACATTGCCAAGTTTGTTTCACAAAAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAAGCAGTAAGGCTCATCAAAATCCTCTACCAAAGCA,5180,5394,MEPVDPRLEPWKHPGSQPKTACNTCYCKKCCFHCQVCFTKKALGISYGRKKRRQRRRAPQDRQTHQASLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAGACGGCTTGTAACACTTGCTATTGTAAAAAATGTTGCTTTCATTGCCAAGTTTGTTTCACAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAGGACCGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK116110.1,rev_exon1,5277,5352,forward,0.46153846153846145,0,MAGRSGDSDEDLLKAVRLIKILYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAAGCAGTAAGGCTCATCAAAATCCTCTACCAAAGCA,5319,5394,MAGRSGDSDEELLRTVRLIKLLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAGGACCGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK116110.1,vpu,5369,5614,forward,0.536144578313253,0,MQSLQIGAIVALVVGTIIAIVVWSIVLIEYRKILRQKKIDRIIDRIVERAEDSGNESEGDQEELSALVERGHDAPWNVNDL,ATGCAATCTTTGCAAATAGGAGCAATAGTAGCATTAGTAGTAGGAACAATAATAGCAATAGTTGTGTGGTCTATAGTACTCATAGAATATAGGAAAATATTAAGACAAAAGAAAATAGATAGAATAATAGATAGAATAGTAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGACCAGGAAGAGTTATCAGCACTGGTGGAAAGGGGGCATGATGCTCCTTGGAATGTTAATGATCTGTAG,5411,5656,MHSLQILGIVALVVAGIIAIVVWSIVIIEYRKILRQRKIDRLIDRIIERAEDSGNESEGDQEELSALVEMGHLAPWDIND**,ATGCACTCTTTACAAATATTAGGAATAGTAGCATTAGTAGTAGCAGGAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGGAAAATAGACAGGTTAATTGATAGAATAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGATCAGGAAGAATTGTCAGCACTTGTGGAGATGGGGCATCTTGCTCCTTGGGATATTAATGATTAGTAG -MK116110.1,env,5532,8072,forward,0.5598388952819338,0,MRVKETRKSYQHWWKGGMMLLGMLMICSAATNLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVLLGNVTEDFNAWKNNMVEQMHEDIISLWDQSLKPCVKLTPLCVILHCTDVNNTRNGMTGELKNCSFNITTKITNKVQKEYALFYKLDVVPINNKDNDTSFNNNSYRLISCNTSVITQACPKVSFEPIPIHYCTPAGYAILRCNNETFSGKGPCTNVSSIQCTHGIRPVVSTQLLLNGSLAKQEVVIRSQNFSDNVKTIIVQLKTPVKINCTRPNNNTRKSIHAGPGKVIYATGEIIGDIRQAHCNISAAEWNDTLGQIVTKLQEQFGNKTIVFNQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWNNNGTNTWNSTGNITLPCKIRQIVNMWQKVGKAMYAPPIRGQIKCSSNITGLLLTRDGGNESESETFRPGGGDMRDNWRSELYKYKVVRIEPLGLAPTKAKRRVVQREKRAIGTLGAVFLGFLGTAGSTMGAASMTLTVQARQLLSGIVQQQNNLLKAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQLLGIWGCSGKLICTTAVPWNASWSNKSLNEIWDNMTWMEWEKEISNYTQLIYTLIEESQSQQEKNEQELLALDKWDSLWSWFSITKWLWYIKIFIMIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTRFPAPRGPDRPEGIEEDGGERDRDRSTRLVTGFLPLFWDDLRSLCLFSYHRLRDLLLIAARIVELLGHRGWEILKHWWSLLQYWSQELKKSAVSLLNATAIAVAEGTDRIIEVVQRACRAILHIPVRLRQGLERALL,ATGAGAGTGAAGGAGACCAGGAAGAGTTATCAGCACTGGTGGAAAGGGGGCATGATGCTCCTTGGAATGTTAATGATCTGTAGTGCTGCAACAAATTTGTGGGTCACAGTCTATTATGGGGTACCTGTATGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACGCATGCCTGTGTACCCACGGACCCCAACCCACAAGAAGTATTATTGGGAAATGTGACAGAAGATTTTAATGCATGGAAAAATAACATGGTAGAGCAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCTTAAAGCCATGTGTAAAATTAACCCCACTTTGTGTTATTTTGCATTGCACTGATGTCAACAATACTAGAAATGGGATGACAGGAGAACTAAAAAACTGCTCTTTCAATATCACCACAAAAATAACAAATAAGGTACAGAAAGAATATGCACTCTTTTATAAACTTGATGTAGTACCAATAAATAATAAGGATAATGATACTAGCTTTAATAATAATAGCTATAGGTTGATAAGTTGTAACACCTCAGTTATTACACAGGCTTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTACTGTACCCCGGCTGGTTATGCAATTCTAAGGTGTAACAATGAGACATTCAGTGGAAAAGGGCCATGTACAAATGTCAGCTCAATACAATGTACACATGGAATTAGGCCAGTAGTGTCAACTCAACTGCTGTTAAATGGCAGTCTAGCAAAACAGGAGGTAGTAATTAGATCTCAAAATTTCTCGGACAATGTTAAAACCATAATAGTACAGCTGAAGACCCCTGTAAAAATTAACTGTACAAGGCCCAATAACAATACAAGAAAAAGTATACATGCAGGACCAGGGAAAGTAATTTATGCAACAGGAGAAATAATAGGAGATATAAGACAAGCACATTGCAACATTAGTGCAGCAGAGTGGAATGATACTTTAGGACAGATAGTTACAAAATTACAAGAACAATTTGGGAATAAAACAATAGTCTTCAATCAATCGTCAGGAGGGGACCCAGAAATTGTAACACACAGTTTTAATTGTGGAGGGGAATTTTTTTACTGTAATTCAACACAACTGTTTAATAGTACTTGGAATAATAATGGTACTAATACTTGGAATAGTACAGGTAATATCACACTCCCATGTAAAATAAGGCAAATTGTAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCTCCTCCCATCCGTGGACAAATTAAATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAACGAGAGTGAGAGCGAAACCTTCAGACCTGGCGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAGAATTGAACCATTAGGACTAGCACCCACTAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAATAGGAACACTGGGAGCTGTGTTCCTTGGGTTCTTGGGAACAGCAGGAAGCACTATGGGCGCAGCGTCAATGACGCTGACGGTACAGGCCAGGCAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATCTGCTGAAGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATTAAACAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGCAATAAATCTCTGAATGAAATTTGGGATAACATGACCTGGATGGAGTGGGAAAAAGAAATTAGTAATTACACACAATTAATATACACTTTAATTGAAGAATCGCAGAGCCAGCAAGAAAAGAATGAACAAGAATTATTGGCACTAGATAAGTGGGACAGCTTGTGGAGTTGGTTTAGTATAACAAAATGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTAATAGGGTTAAGAATAGTTTTTACTGTACTTTCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTGTCATTTCAGACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGACGGTGGAGAGAGAGACAGAGACAGATCCACACGCTTAGTAACCGGATTCTTACCACTTTTCTGGGACGACCTGAGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGCAGCGAGGATTGTGGAACTTCTGGGACACAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTACAATATTGGAGTCAGGAACTAAAAAAAAGTGCTGTTAGCTTGCTCAATGCCACAGCTATAGCAGTAGCTGAGGGGACAGATAGGATTATAGAAGTAGTACAAAGAGCTTGTAGAGCTATTCTCCACATACCTGTAAGACTAAGACAAGGCTTAGAAAGAGCTTTGCTATAA,5574,8123,MRVKEIRKNCQHLWRWGILLLGILMISSAAENLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWTNNMAEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDLRNTTNTNSTAEEMEAKGEMKNCSFNITTSIRNKLQKEYALFYKLDIVPINNDNTSYRLISCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNDKKFSGNGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTDNAKTIIVQLKEPVEINCTRPNNYTRKRITMGPGRVYYTTGEIIGDIRRAHCNISSTKWNNTLGQIVKKLKEQFNNNTIVFKKSSGGDPEIVMHSFICGGEFFFCNSTKLFNSTWNSTEGNDDGEERNITLPCRIKQIVNMWQEVGKAMYAPPIGGQIRCTSNITGLLLTRDGGNQNGTNETEIFRPGGGNMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLDEIWNNMTWMQWEREINNYTGLIYTLIEESQNQQEKNELDLLQLDKWASLWNWFDITNWLWYIKIFIMIVGGLVGLRIIFTVLSIVNRVRQGYSPLSFQTHLPAPRGPDRPGGIEEEGGERDRDTSGRLVDGFLAIFWVDLRNLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVLQRVYRAILNIPTRIRQGLERALL*,ATGAGAGTGAAGGAGATCAGGAAGAATTGTCAGCACTTGTGGAGATGGGGCATCTTGCTCCTTGGGATATTAATGATTAGTAGTGCTGCAGAAAATTTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAAGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAAYATGTGGACAAATAACATGGCAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGTCTAAAGCCATGTGTAAAATTAACTCCACTCTGTGTTACTTTAAATTGCACTGATTTGAGAAATACTACTAATACCAATAGTACCGCCGAGGAAATGGAGGCGAAAGGAGAAATGAAAAACTGCTCTTTCAATATCACCACAAGCATAAGGAATAAGTTGCAGAAAGAATATGCACTCTTTTATAAACTTGATATAGTACCAATAAATAATGATAATACTAGCTATAGGTTGATAAGTTGTAACACCTCAGTCATTACCCAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAATGATAAGAAGTTCAGTGGAAACGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTATCAACTCAACTGCTGCTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTTACAGACAATGCTAAAACCATAATAGTACAGCTGAAAGAACCTGTAGAAATTAATTGTACAAGACCTAACAACTATACAAGGAAAAGAATAACTATGGGACCAGGGAGAGTATATTATACAACAGGAGAAATAATAGGAGATATAAGACGAGCACATTGTAACATTAGTAGCACAAAATGGAATAACACTTTAGGACAGATAGTTAAAAAATTAAAAGAACAATTTAACAATAATACAATAGTCTTTAAGAAATCCTCAGGAGGGGACCCAGAAATTGTAATGCACAGTTTTATTTGTGGAGGGGAATTTTTCTTCTGTAATTCAACAAAACTGTTTAATAGTACTTGGAATAGCACTGAAGGAAATGACGATGGAGAGGAAAGAAATATCACACTCCCATGCAGAATAAAACAAATTGTAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCGGAGGACAAATTAGATGCACCTCAAATATTACAGGGCTGCTATTAACAAGAGATGGAGGTAACCAAAATGGGACCAACGAGACTGAAATCTTCAGACCTGGAGGAGGAAATATGAGGGACAATTGGAGAAGTGAACTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACTAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAGGCCAGACTATTATTGTCTGGTATAGTGCAACAGCAGAACAATCTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCTTAGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATCTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGATGAGATTTGGAATAACATGACCTGGATGCAATGGGAAAGAGAAATTAACAATTACACAGGCTTAATATACACCTTAATTGAAGAATCGCAAAACCAACAAGAAAAGAATGAACTAGATTTACTGCAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTGACATAACAAACTGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGATTGGTAGGTTTAAGAATAATTTTTACTGTACTTTCTATAGTGAATAGGGTTAGGCAGGGATACTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGGAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACACATCCGGACGCTTAGTGGATGGATTCTTAGCAATTTTCTGGGTCGATCTGCGGAACCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCTATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTATTACAAAGAGTTTATAGAGCTATTCTCAACATACCTACAAGAATCAGACAGGGCTTGGAAAGGGCTTTGCTATAA -MK116110.1,tat_exon2,7654,7746,forward,0.4838709677419355,0,RPASQPRGDPTGPKESKKTVERETETDPHA,AGACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGACGGTGGAGAGAGAGACAGAGACAGATCCACACGCTTAG,7705,7797,RPTSQPRGDPTGPEESKKKVERETETHPDA*,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGGAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACACATCCGGACGCTTAG -MK116110.1,rev_exon2,7655,7930,forward,0.48913043478260865,0,DPLPSPEGTRQARRNRRRRWRERQRQIHTLSNRILTTFLGRPEEPVPLQLPPLERLTLDCSEDCGTSGTQGVGNPQTLVESPTILESGTKKKCC,GACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGACGGTGGAGAGAGAGACAGAGACAGATCCACACGCTTAGTAACCGGATTCTTACCACTTTTCTGGGACGACCTGAGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGCAGCGAGGATTGTGGAACTTCTGGGACACAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTACAATATTGGAGTCAGGAACTAAAAAAAAG,7706,7981,DPPPSPEGTRQARRNRRRRWRERQRHIRTLSGWILSNFLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGGAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACACATCCGGACGCTTAGTGGATGGATTCTTAGCAATTTTCTGGGTCGATCTGCGGAACCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG -MK116110.1,nef,8074,8694,forward,0.48142857142857154,0,MGGKWSKSSVVGWPAVRERIRRAGPAAEGVGAVSRDLDKHGAITSNNTPATNAACAWLEAQEEEEVGFPVRPQVPLRPMTYKGALDLSHFLKEKGGLEGMIYSQQRQDILDLWVYHTQGYFPDWQNYTPGPGVRFPLTFGWCFKLVPLETEQVEAATGGENNCLLHPLNQHGMDDPEREVLMWKFDSSLAFHHRAKELHPEYYKDC,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGGTTGGATGGCCTGCTGTAAGGGAAAGAATAAGAAGAGCTGGGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGACAAACATGGAGCAATCACAAGTAACAATACACCAGCTACTAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTTAGGCCTCAAGTACCTTTAAGACCAATGACTTACAAGGGAGCTTTGGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGATGATATACTCCCAGCAAAGACAAGATATCCTTGATCTGTGGGTCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGACCAGGGGTCAGGTTTCCACTGACCTTTGGATGGTGCTTCAAACTAGTACCACTTGAGACAGAGCAGGTAGAAGCGGCCACTGGAGGAGAGAACAACTGCTTGTTACACCCTTTGAACCAGCATGGGATGGATGACCCGGAGAGAGAAGTACTAATGTGGAAGTTTGACAGCAGCCTAGCATTTCATCACAGAGCCAAAGAGCTGCATCCGGAGTACTACAAAGACTGCTGA,8125,8751,MGSKWSKMSGWPAVRERMRRTKPAEPAADGVGAASRDLEKHGAITSSNTAATNADCAWLEAQEEEGEVGFPVKPQVPLRPMTYKGALDLSHFLKEKGGLEGLIYSQQRQDILDLWVYHTQGYFPDWQNYTPGPGVRFPLTFGWCFKLVPVEPDKVEEANEGENNCLLHPMSQHGMEDPEKEVLMWKFDSRLALHHMAREKHPEYYKDC*,ATGGGTAGCAAGTGGTCAAAAATGAGTGGGTGGCCTGCTGTAAGGGAAAGAATGAGAAGAACTAAGCCAGCTGAGCCAGCAGCAGATGGAGTGGGAGCAGCATCTAGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGGGGAGGTGGGTTTCCCAGTCAAACCTCAGGTACCTTTAAGACCAATGACTTACAAGGGAGCTCTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAACAAAGACAAGATATCCTTGATCTGTGGGTCTACCATACACAAGGCTACTTCCCTGATTGGCAGAATTACACACCAGGGCCAGGGGTCAGATTCCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGAGCCAGACAAGGTAGAAGAGGCCAATGAAGGGGAAAACAACTGCTTGTTACACCCTATGAGCCAGCATGGGATGGAAGACCCGGAGAAAGAAGTGTTAATGTGGAAGTTTGACAGCCGCCTGGCATTGCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAGGACTGCTGA -MK115527.1,gag,683,2182,forward,0.2675944333996021,0,MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE,ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGACGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGCTAATGTAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA,789,2291,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA -MK115527.1,pol,1975,4986,forward,0.15109343936381703,0,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTACTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG,2084,5095,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK115527.1,vif,4931,5509,forward,0.36269430051813467,0,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG,5040,5618,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG -MK115527.1,vpr,5449,5739,forward,0.2886597938144331,0,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS,ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG,5558,5848,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -MK115527.1,tat_exon1,5720,5934,forward,0.25,0,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA,5829,6043,MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115527.1,rev_exon1,5859,5934,forward,0.46153846153846145,0,MAGRSGDSDEELLKAVRLIKILYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA,5968,6043,MAGRSGDSDEELIRTVRLIKLLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115527.1,vpu,5951,6196,forward,0.727586206896552,0,MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL,ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAAAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG,6060,6308,MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*,ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG -MK115527.1,env,6114,8654,forward,0.5241695303550973,0,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL,ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGAAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAAAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA,6223,8793,MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*,ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA -MK115527.1,tat_exon2,8236,8328,forward,0.29032258064516125,0,RPTSQPRGDPTGPKESKKKVEKETETDQFD,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG,8375,8467,RPTSQPRGDPTGPKEPKKKVERETETDPFD*,AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG -MK115527.1,rev_exon2,8237,8512,forward,0.3913043478260869,0,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG,8376,8651,DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*,GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG -MK115527.1,nef,8656,9270,forward,0.5333333333333332,0,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC,ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTATGAGCCTGCATGGGATGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA,8795,9415,MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA -MK114997.1,gag,210,1718,forward,0.24035785288270395,0,MGARASVLSGGELDRWEKIYLRPGGKKKYRLKHIVWASRELERFAVNPGLLESSEGCRQILGQLQPSLQTGSEELRSLYNTVAVLYCVHQRINVKDTKEALDTIEEEQNKSKKKAQQAAADTGNSSNSQVSQNYPIVQNLQGQMVHQPISPSTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINDEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHTPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSASIMAQGGNFRNQKRNVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPAESFRFGEEATAPPQKQETKDKELYPLASLRSLFGNDP,ATGGGTGCGAGAGCGTCAGTATTAAGCGGCGGAGAATTAGATAGATGGGAAAAAATTTACCTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTCAACCCTGGCCTGTTAGAGTCATCAGAAGGCTGCAGGCAAATTCTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAGTCCTCTATTGCGTACATCAAAGGATAAATGTAAAAGACACCAAGGAAGCTCTAGACACAATAGAGGAAGAACAAAACAAAAGTAAGAAAAAGGCACAACAAGCAGCAGCTGACACAGGAAACAGCAGCAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACCTACAGGGGCAAATGGTACATCAACCCATATCACCTAGCACTTTAAATGCATGGGTAAAAGTGATAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCACTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAGATGTTAAAAGAAACCATCAATGATGAGGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGCCCTATTGCACCAGGCCAGATGAGGGAACCAAGGGGAAGTGACATAGCAGGAACCACTAGTACCCTTCAGGAACAAATAGGATGGATGACACATACTCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTGGGGTTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTTTGGACATAAGACAAGGACCAAAGGAACCCTTTAGGGATTATGTAGATCGGTTCTATAAAACTCTAAGGGCTGAGCAAGCGTCACAGGATGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACCATTTTAAAAGCATTAGGACCAGCAGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGGGGACCCGGCCACAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCCTCCATAATGGCGCAAGGAGGCAATTTTAGGAACCAAAAAAGGAATGTTAAGTGTTTCAATTGTGGCAAAGAAGGACACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATTTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTGGGGAAGAGGCAACAGCTCCCCCTCAGAAGCAGGAGACGAAGGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAA,789,2291,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA -MK114997.1,pol,1511,4522,forward,0.22266401590457252,0,FFREDLAFPQGKAREFSPEQTRANSPASRELQVWGRGNSSPSEAGDEGQGTVSFSFPQITLWQRPIISIRIGGQLKEALLDTGADDTVLEEMNLPGKWKPKMIGGIGGFIKVRQYDQVPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEDKIKALIEICAEMEKEGKITKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKRKKSVTVLDVGDAYFSVPLDQDFRKYTAFTIPSINNEAPGVRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKENPDIVIYQYMDDLYVGSDLEIEQHRTKIEELRQHLLGWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPDKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKEPVHGAYYDPSKDLIAEVQKQGGDQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIALEAIVIWGKTPKFKLPIQKETWEMWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRETKLGKAGYVTDRGRQKVVSLMDTTNQRTELHAIHLALQDSGSTVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKIYLAWVPAHKGIGGNEQVDKLVSNGIRKILFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEPMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIAAETGQETAYFILKLAGRWPVKIIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNNELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQISKIQNFRVYYRDSREPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKILRDYGKQMAGDDCVASRQDED,TTTTTTAGGGAAGATTTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTGGGGAAGAGGCAACAGCTCCCCCTCAGAAGCAGGAGACGAAGGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAAGAATAGGAGGGCAACTAAAAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGGTACCCATAGAAATTTGTGGACATAAAGCTATAGGTACAGTATTAATAGGACCTACACCAGTCAACATAATTGGAAGAAATCTGTTGACCCAGCTTGGTTGCACTTTAAATTTTCCCATCAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAGAGTTAAACAATGGCCATTGACAGAAGACAAAATAAAGGCATTAATAGAAATTTGTGCAGAAATGGAAAAGGAAGGGAAAATTACAAAAATTGGGCCTGAGAATCCATATAATACTCCAGTGTTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCCGAGAACTTAATAAGAGAACACAAGACTTCTGGGAAGTTCAACTAGGAATACCACATCCCGCAGGGTTAAAAAGGAAAAAATCAGTAACAGTACTAGATGTGGGTGATGCATATTTTTCAGTTCCTTTAGACCAAGACTTCAGAAAGTATACTGCATTTACCATACCCAGTATAAACAATGAGGCACCAGGAGTTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGGAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGAGCAGCACAGAACAAAAATAGAGGAACTGAGACAACATCTGTTGGGGTGGGGACTCACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATACAGCTGCCAGACAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAGTTGAATTGGGCAAGCCAGATCTATCCAGGGATTAAAGTAAAGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGAACCAGTCCATGGAGCATATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGGGGAGACCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGGAAATATGCAAGAACGAGGGGTGCCCACACTAATGATGTAAAACAGTTAACAGAGGCAGTGCAAAAAATAGCCCTAGAAGCCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCTATACAAAAGGAAACATGGGAAATGTGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAATTTGTCAATACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAGAAAGAACCCATAATAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAGACTAAATTAGGAAAAGCAGGGTATGTTACGGACAGAGGAAGACAAAAGGTTGTCTCCCTAATGGACACAACAAATCAGAGGACTGAGTTACACGCAATTCATCTAGCTTTGCAGGATTCAGGATCAACAGTAAACATAGTAACAGACTCACAATATGCCTTAGGGATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGCTAATAAAAAAGGAAAAGATTTACCTGGCATGGGTCCCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTAATGGAATCAGAAAAATACTATTTCTGGATGGAATAGATAAGGCCCAAGAAGAACATGAAAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAGTGTCAGTTAAAAGGAGAACCTATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTGGATTGTACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATACATAGAAGCAGAAGTTATTGCCGCAGAAACAGGGCAGGAAACAGCATACTTTATCTTAAAATTGGCAGGAAGATGGCCAGTAAAAATAATACATACAGACAATGGCAGCAATTTCACCAGCACTACGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGTATTCCCTACAATCCTCAAAGTCAAGGAGTAGTAGAATCTATGAATAATGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATTCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAACTACAAAAACAAATTTCAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGAACCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCCTTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG,2084,5095,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK114997.1,vif,4467,5045,forward,0.3969072164948453,0,MENRWQVMIVWQVDRMRIRTWNSLVKHHMHISKKAQRWVYRHHYESHNPKTSSEVHIPLGEARLVIKTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPSLADQLIHLYYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALTALITPKRRKPPLPSVTKLTEDRWNKSQRTKGHKGSHTMNGH,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATATTTCAAAGAAAGCTCAGAGATGGGTTTACAGACATCACTATGAAAGCCATAATCCAAAAACAAGTTCAGAAGTACACATCCCATTAGGGGAAGCAAGATTGGTAATAAAAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGCCAGGGAGTATCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTAGCCTGGCAGACCAACTAATTCACCTGTATTATTTTGATTGTTTTTCAGAATCTGCTATAAGGAATGCCATATTAGGACATAGAGTTAGCCCTAGGTGTGAATATCAGGCAGGACATAACAAGGTAGGATCCCTACAATACTTGGCATTAACAGCATTAATAACACCAAAAAGGAGAAAGCCACCCTTGCCCAGTGTTACAAAATTAACAGAGGACAGATGGAACAAGTCCCAGAGGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGACATTAG,5040,5618,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG -MK114997.1,vpr,4985,5275,forward,0.28125,0,MEQVPEDQGPQREPYNEWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDTWAGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRARNGASRS,ATGGAACAAGTCCCAGAGGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGACATTAGAGCTCTTAGAAGAGCTTAAGAGTGAAGCTGTTAGGCATTTTCCTAGACCATGGCTTCATAGCTTAGGACAATATATCTATGAAACTTATGGAGATACTTGGGCAGGGGTGGAAGCCATAATAAGAATCCTGCAACAACTGTTGTTCATTCATTTCAGAATTGGGTGTCAACATAGCAGGATAGGAATTATTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG,5558,5848,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -MK114997.1,tat_exon1,5256,5470,forward,0.547945205479452,0,MEPVDPSLEPWKHPGSQPMTACNNCYCKRCCFHCQVCFTTKGLGISYGRKKRRQRRRASHSSQNHQAALPEX,ATGGAGCCAGTAGATCCTAGCTTAGAGCCCTGGAAACATCCAGGAAGTCAACCTATGACTGCTTGTAACAATTGCTATTGTAAACGGTGTTGCTTTCATTGCCAAGTTTGCTTCACCACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCA,5829,6043,MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK114997.1,rev_exon1,5395,5470,forward,0.5925925925925926,0,MAGRSGDSDEELLTAVRIIKRLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCA,5968,6043,MAGRSGDSDEELIRTVRLIKLLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK114997.1,vpu,5487,5732,forward,0.7790697674418607,0,MHALKIAAIVGLVVATIIAIVVWSIVLIEYRKILRQRKIDRLIDRIRERAEDSGNESDGDQEELSALVEMGHLVPWDGDDM,ATGCATGCCTTAAAAATAGCAGCAATAGTAGGATTAGTAGTAGCAACAATAATAGCAATAGTTGTGTGGTCTATAGTATTAATAGAATATAGGAAAATTTTAAGACAAAGGAAAATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAAAGTGACGGAGACCAGGAGGAATTATCAGCACTTGTGGAGATGGGGCATCTTGTTCCTTGGGATGGTGATGATATGTAG,6060,6308,MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*,ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG -MK114997.1,env,5650,8206,forward,1.2346375143843504,1167,MHSFNCGGEFFYCNTTQLFNSTWNGTDNWNGTESNNTITLPCRIKQIINLWQEVGRAMYAPPIQGQIRCSSNITGLLLVRDGGSNNSSNDTETFRPGGGDMRDNWRSELYKYKVVKIEPLGIAPTHAKRRVVQREKRAVGLGAFFLGFLGAAGSTMGAASVTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLALERYLKDQQLLGIWGCSGKLICTTNVPWNISWSPRWNRSLDEIWTNMTWRQWEKEIDNYTDTIYNLIELSQNQQEQNEQDLLALDKWASLWSWFDITQWLWYIKIFIMIVGGLIGLRIVFTILSIVNRVRQGYSPLSLQTLLPTQRGPDRPEGTEEGGGERDRGTSTRLVHGFLALIWDDLRSLFLFSYHRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWSQEIKNSAVSLLNTTAIAVAEGTDRIIEVLQRGFRAILHIPTRIRQGLERALL,ATGAAAGTGACGGAGACCAGGAGGAATTATCAGCACTTGTGGAGATGGGGCATCTTGTTCCTTGGGATGGTGATGATATGTAGTGCCAACAACTTGTGGGTCACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAACTACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGAGACAGAGAAGCATAATGTTTGGGCAACCCATGCCTGTGTACCCACAGACCCCAGCCCACAGGAAGTAGCATTGAAAAATGTGACAGAAACATTTAACATGTGGAAAAATGACATGGTAGAGCAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTGAAACTAACCCCACTCTGTGTTACTTTAAATTGCACTGATAGTTTAAGTAATAATACTAATTATAATATAACGGAAAAAGGAGAAATAAAAAACTGCTCTTTCAATGTCACCACAGGAATAAGAGATAGGGTGACAAAAGAACATGCACTTTTCTATAGACTTGATGTAGTATCAATAGATGAAGATAGTGGAAATACTACGGGCAAATATAGGATGATAAATTGTAACACCTCAGTCATTACACAAGCCTGTCCAAAGATATCCTTTGAGCCAATTCCCATACATTTTTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAGAGATAAGAAATTCAATGGAACAGGAGAATGTAGAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGCTGAACGGCAGTCTAGCAGAAGAAGAGATAGTAATTAGATCTGCCAATCTCTCGAACAATGCTAAAACCATAATAGTACAGCTGAATAAACCTGTAAAAATTAATTGTACAAGACCCAACAATAATACAAGAAGAAGTATACACATAGGACCAGGGAGAGCATTTTATGGAACAGACATAATAGGGGATATAAGACAAGCGCATTGTAACATTAGTGGGAAAGATTGGAATGACACTTTAAAAAAAGATAGTTATAAAATTAAAAGAAAAATTTGAGAATAAAACAATAGTCTTTAATCAATCCTCAGGAGGGGACCCAGAGATAGTGATGCATAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACAACACAGCTGTTTAATAGTACTTGGAATGGTACTGATAATTGGAATGGTACTGAATCAAATAACACTATCACACTCCCATGCAGAATAAAACAAATTATAAACTTGTGGCAGGAAGTAGGAAGAGCAATGTATGCTCCTCCCATCCAAGGGCAAATTAGATGTTCATCAAATATTACAGGGCTGCTGCTAGTAAGAGATGGTGGGAGCAACAATAGTAGTAATGATACAGAGACATTCAGGCCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTCAAAATTGAACCATTAGGAATAGCACCCACCCATGCAAAGAGAAGAGTGGTGCAAAGGGAAAAAAGAGCAGTGGGACTTGGAGCTTTCTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTCTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTAATGTGCCCTGGAATATTAGTTGGAGCCCTAGATGGAATAGATCTCTAGATGAGATTTGGACTAACATGACCTGGAGGCAGTGGGAAAAGGAAATTGACAATTACACAGACACAATATATAACTTAATTGAACTATCACAGAACCAACAAGAACAGAATGAACAAGACTTATTGGCATTAGATAAGTGGGCAAGTTTGTGGAGTTGGTTTGACATTACACAGTGGCTATGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGATAGGTTTAAGAATAGTTTTTACTATACTGTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTGTCATTGCAGACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAGGAACAGAAGAAGGAGGTGGAGAGAGAGACAGAGGCACATCCACGAGATTAGTGCATGGATTCTTAGCACTCATCTGGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAGAGACTTACTCTTGATTGCGGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCACTGAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAAATAAAGAATAGTGCAGTTAGCTTGCTCAACACCACAGCAATAGCAGTAGCTGAGGGGACAGATAGGATTATAGAAGTACTACAAAGAGGCTTTAGAGCTATTCTCCACATACCTACACGAATAAGACAGGGCTTAGAAAGGGCTTTGCTATAA,6223,8793,MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*,ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA -MK114997.1,tat_exon2,7788,7880,forward,0.5806451612903225,0,RPSSQPRGDPTGPKEQKKEVERETEAHPRD,AGACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAGGAACAGAAGAAGGAGGTGGAGAGAGAGACAGAGGCACATCCACGAGATTAG,8375,8467,RPTSQPRGDPTGPKEPKKKVERETETDPFD*,AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG -MK114997.1,rev_exon2,7789,8064,forward,0.423913043478261,0,DPPPNPEGTRQARRNRRRRWRERQRHIHEISAWILSTHLGRPAEPVPLQLPPLERLTLDCGEDCGTSGTQGVGSTEVLVESPAVLESGNKE,GACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAGGAACAGAAGAAGGAGGTGGAGAGAGAGACAGAGGCACATCCACGAGATTAGTGCATGGATTCTTAGCACTCATCTGGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAGAGACTTACTCTTGATTGCGGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCACTGAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAAATAAAGAATAG,8376,8651,DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*,GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG -MK114997.1,nef,8208,8849,forward,0.7375565610859729,0,MGNKLSRGLRAGWPAIRERMRRARPVREPEPAAAGVGAASRDLERHGALTSSNTAATNADVACLEAQQEEEEVGFPVRPQVPLRPMTFKGALDISHFLKEKGGLDGLIYSKRRQDILDLWLYNTQGYFPDWQNYTPGPGVRFPLCFGWCFKLVPVDPDKVEEASVGENNCLLSPENLHGMEDEHREVLQWRFDSRLAFHHMARELHPEYFKDC,ATGGGTAACAAGTTGTCAAGAGGGCTCAGGGCTGGATGGCCTGCCATAAGGGAGAGAATGAGACGAGCTAGGCCAGTAAGAGAGCCAGAGCCAGCAGCAGCTGGGGTGGGAGCAGCATCTCGAGACCTGGAAAGACATGGAGCACTTACAAGTAGCAATACAGCAGCTACCAATGCTGATGTTGCCTGCCTGGAAGCACAACAGGAGGAGGAAGAGGTGGGTTTTCCAGTCAGACCTCAGGTACCTTTAAGGCCAATGACTTTCAAGGGAGCTCTAGATATTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGATGGGCTAATTTACTCCAAAAGAAGACAAGACATCCTTGATCTGTGGCTCTACAACACACAAGGCTACTTCCCTGACTGGCAGAACTACACACCAGGGCCAGGAGTCAGATTTCCACTGTGTTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGATAAGGTAGAAGAGGCCAGTGTAGGAGAGAACAACTGCTTGTTAAGCCCCGAAAACCTTCATGGAATGGAGGACGAACACAGAGAAGTATTGCAGTGGAGGTTTGACAGTCGCCTAGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAATATTTCAAGGACTGCTGA,8795,9415,MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA -MK115518.1,gag,739,2238,forward,0.2675944333996021,0,MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE,ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGACGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGCTAATGTAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA,789,2291,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA -MK115518.1,pol,2031,5042,forward,0.15109343936381703,0,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTACTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG,2084,5095,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK115518.1,vif,4987,5565,forward,0.36269430051813467,0,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG,5040,5618,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG -MK115518.1,vpr,5505,5795,forward,0.2886597938144331,0,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS,ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG,5558,5848,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -MK115518.1,tat_exon1,5776,5990,forward,0.25,0,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA,5829,6043,MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115518.1,rev_exon1,5915,5990,forward,0.46153846153846145,0,MAGRSGDSDEELLKAVRLIKILYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA,5968,6043,MAGRSGDSDEELIRTVRLIKLLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115518.1,vpu,6007,6252,forward,0.727586206896552,0,MQPLHIAAIVGLIVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL,ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAATAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG,6060,6308,MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*,ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG -MK115518.1,env,6170,8710,forward,0.5247139588100684,0,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTPEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL,ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCCCAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA,6223,8793,MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*,ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA -MK115518.1,tat_exon2,8292,8384,forward,0.29032258064516125,0,RPTSQPRGDPTGPKESKKKVEKETETDQFD,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG,8375,8467,RPTSQPRGDPTGPKEPKKKVERETETDPFD*,AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG -MK115518.1,rev_exon2,8293,8568,forward,0.3913043478260869,0,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG,8376,8651,DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*,GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG -MK115518.1,nef,8712,9326,forward,0.5333333333333332,0,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC,ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTACGAGCCTGCATGGGATGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA,8795,9415,MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA -MK115065.1,gag,221,1729,forward,0.2507968127490041,0,MGARASVLSGGELDRWEKIYLRPGGKKKYRLKHIVWASRELERFAVNPGLLESSEGCRQILGQLQPALQTGSEELRSLYNTVAVLYCVHPRINVKDTKEALDKIEEEQNKSKKKAQQAAADTGNSSNSQVSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINAEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHNPPIPVGDIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTKSASIMVQGGNFRNQRRNVKCFNCGKEGHTAKNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPAESFRFGEEATAPPQKQETKDQELYPLASLRSLFGNDP,ATGGGTGCGAGAGCGTCAGTATTAAGCGGCGGAGAATTAGATAGATGGGAAAAAATTTACCTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTCAACCCTGGCCTGTTAGAGTCATCAGAAGGCTGCAGGCAAATTCTGGGACAGCTACAACCAGCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAGTCCTCTATTGCGTACATCCAAGGATAAATGTAAAAGACACCAAAGAAGCTCTAGACAAAATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAACAAGCAGCAGCTGACACAGGAAACAGCAGCAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACCTACAGGGGCAAATGGTACATCAACCCTTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAATAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCACTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAGATGTTAAAAGAAACCATCAATGCTGAGGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGCCCTATTGCACCAGGCCAGATGAGGGAACCAAGGGGAAGTGACATAGCAGGAACCACTAGTACCCTTCAGGAACAAATAGGATGGATGACACATAATCCACCTATCCCAGTAGGAGATATCTATAAAAGATGGATAATCCTGGGGTTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTTTGGACATAAGACAAGGACCAAAGGAACCCTTTAGGGATTATGTAGACCGGTTCTATAAAACTCTAAGGGCTGAGCAAGCGTCACAGGATGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAATCCAGATTGTAAGACCATTTTAAAAGCATTAGGACCAGCAGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGGGGACCCGGCCACAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAAATCAGCCTCCATAATGGTGCAGGGAGGCAATTTTAGGAACCAAAGAAGGAATGTTAAGTGTTTCAATTGTGGCAAAGAAGGACACACAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATTTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTGGGGAAGAGGCAACAGCTCCCCCTCAGAAGCAGGAGACGAAGGACCAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAA,789,2291,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA -MK115065.1,pol,1522,4533,forward,0.20775347912524844,0,FFREDLAFPQGKAREFSPEQTRANSPASRELQVWGRGNSSPSEAGDEGPGTVSFSFPQITLWQRPIISIKIGGQLKEALLDTGADDTVLEEMNLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALIEICAEMEKEGKITKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNEAPGVRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKENPDIVIYQYMDDLYVGSDLEIEQHRTKIEELRQHLLGWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPDKDSWTINDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKEPVHGAYYDPSKDLIAEVQKQGGDQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIALEAIVIWGKTPKFKLPIQKETWEMWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVVSLMDTTNQRTELHAIHLALQDSGSTVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKIYLAWVPAHKGIGGNEQVDKLVSSGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEPMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIAAETGQETAYFILKLAGRWPVKIIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQISKIQNFRVYYRDSREPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKILRDYGKQMAGDDCVASRQDED,TTTTTTAGGGAAGATTTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTGGGGAAGAGGCAACAGCTCCCCCTCAGAAGCAGGAGACGAAGGACCAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAAAAATAGGAGGGCAACTAAAAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATTTGTGGACATAAAGCTATAGGTACAGTATTAATAGGACCTACACCAGTCAACATAATTGGAAGAAATCTGTTGACCCAGCTTGGTTGCACTTTAAATTTTCCCATCAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAGAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAGGCATTAATAGAAATTTGTGCAGAAATGGAAAAGGAAGGGAAAATTACAAAAATTGGGCCTGAGAATCCATATAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCCGAGAACTTAATAAGAGAACACAAGACTTCTGGGAAGTTCAACTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTAGATGTGGGTGATGCATATTTTTCAGTTCCTTTAGACAAAGACTTCAGAAAGTATACTGCATTTACCATACCCAGTATAAACAATGAGGCACCAGGAGTTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGGAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGATTTAGAAATAGAGCAGCACAGAACAAAAATAGAGGAACTGAGACAACATCTGTTGGGGTGGGGACTCACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATACAGCTGCCAGACAAAGACAGCTGGACTATCAATGACATACAGAAGTTAGTGGGAAAGTTGAATTGGGCAAGCCAAATCTATCCAGGGATTAAAGTAAAGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGAACCAGTACATGGAGCATATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGGGGAGACCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGGAAATATGCAAGAACGAGGGGTGCCCACACTAATGATGTAAAACAGTTAACAGAGGCAGTGCAAAAAATAGCCCTAGAAGCCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCTATACAAAAGGAAACATGGGAAATGTGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAATTTGTCAATACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAGACTAAATTAGGAAAAGCAGGGTATGTTACGGACAGAGGAAGACAAAAGGTTGTCTCCCTAATGGACACAACAAATCAGAGGACTGAGTTACACGCTATTCATCTAGCTTTGCAGGATTCAGGATCAACAGTAAACATAGTGACAGACTCACAATATGCCTTAGGGATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGCTAATAAAAAAGGAAAAGATTTACCTGGCATGGGTCCCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTAGTGGAATCAGAAAAGTACTATTTCTGGATGGAATAGATAAGGCCCAAGAAGAACATGAAAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAGTGTCAGTTAAAAGGAGAACCTATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTGGATTGTACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATACATAGAAGCAGAAGTTATTGCAGCAGAAACAGGGCAGGAAACAGCATACTTTATCTTAAAATTGGCAGGAAGATGGCCAGTAAAAATAATACATACAGACAATGGCAGCAATTTCACCAGCACTACGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGTATTCCCTACAATCCTCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATTCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAACTACAAAAACAAATTTCAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGAACCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCCTTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG,2084,5095,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK115065.1,vif,4478,5056,forward,0.3969072164948453,0,MENRWQVMIVWQVDRMRIRTWNSLVKHHMHISKKAQRWVYRHHYESHNPKTSSEVHIPLGEARLVIKTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPGLADQLIHLYYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALTALITPKRRKPPLPSVTKLTEDRWNKSQRTKGHKGSHTMNGH,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATATTTCAAAGAAAGCTCAGAGATGGGTTTACAGACATCACTATGAAAGCCATAATCCAAAAACAAGTTCAGAAGTACACATCCCATTAGGGGAAGCAAGATTGGTAATAAAAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGCCAGGGAGTATCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTGGCCTGGCAGACCAACTAATTCACCTGTATTATTTTGATTGTTTTTCAGAATCTGCTATAAGGAATGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAGGCAGGACATAACAAGGTAGGATCCCTACAATACTTGGCATTAACAGCATTAATAACACCAAAAAGGAGAAAGCCACCTTTGCCCAGTGTTACAAAATTAACAGAGGACAGATGGAACAAGTCCCAGAGGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGACATTAG,5040,5618,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG -MK115065.1,vpr,4996,5286,forward,0.3125,0,MEQVPEDQGPQREPYNEWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDTWAGVEAIIRTLQQLLFIHFRIGCQHSRIGIIRQRRARNGASRS,ATGGAACAAGTCCCAGAGGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGACATTAGAGCTCTTAGAAGAACTTAAGAGTGAAGCTGTTAGGCATTTTCCTAGACCATGGCTTCATAGCTTAGGACAATATATCTATGAAACTTATGGAGATACTTGGGCAGGGGTGGAAGCCATAATAAGAACCCTGCAACAACTGTTGTTCATTCATTTCAGAATTGGGTGTCAACATAGCAGGATAGGAATTATTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG,5558,5848,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -MK115065.1,tat_exon1,5267,5481,forward,0.5945945945945945,0,MEPVDPSLEPWKHPGSQPMTACNNCYCKRCCFHCQVCFTRKGLGISYGRKKRRQRRRASHSSQNHQAALPEX,ATGGAGCCAGTAGATCCTAGCTTAGAGCCCTGGAAACATCCAGGAAGTCAACCTATGACTGCTTGTAACAATTGCTATTGTAAACGGTGTTGCTTTCATTGCCAAGTTTGCTTCACAAGAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCA,5829,6043,MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115065.1,rev_exon1,5406,5481,forward,0.5925925925925926,0,MAGRSGDSDEELLTAVRIIKRLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCA,5968,6043,MAGRSGDSDEELIRTVRLIKLLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115065.1,vpu,5498,5743,forward,0.7790697674418607,0,MHALEIAAIVGLVVAAIIAIVVWSIVLIEYRKILRQRKIDRLIDRIRERAEDSGNESDGDQEELSALVEMGHLVPWDGDDM,ATGCATGCCTTAGAAATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGTCTATAGTATTAATAGAATATAGGAAAATTTTAAGACAAAGGAAAATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAAAGTGACGGGGACCAGGAGGAGTTATCAGCACTTGTGGAGATGGGGCATCTTGTTCCTTGGGATGGTGATGATATGTAG,6060,6308,MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*,ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG -MK115065.1,env,5661,8261,forward,0.6331111111111118,0,MKVTGTRRSYQHLWRWGILFLGMVMICSANNLWVTVYYGVPVWKEATTTLFCASDAKAYETEKHNVWATHACVPTDPSPQEVALENVTETFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDNLNLNRPNNNTCSNNTNYNITEKGEIKNCSFNVTTGIRDRVTKEHALFYKLDVVPIDEGSGNTTGKYRMINCNTSVITQACPKVSFEPIPIHFCAPAGFAILKCKDKKFNGTGECRNVSTVQCTHGIRPVVSTQLLLNGSLAEEEIVIRSANLSNNAKTIIVQLNKSVKINCTRPNNNTRRSIHIGPGRAFYGTDIIGDIRQAHCNISGKDWNDTLKQIVIKLKEKFENKTIVFTQSSGGDPEIVMHSFNCGGEFFYCNTTQLFNSTWNNSTWNGTDNWNGTESNSTITLPCRIKQIINLWQEVGRAMYAPPIQGQIRCSSNITGLLLVRDGGSNNSSNDTETFRPGGGDMRDNWRSELYKYKVVKIEPLGIAPTHAKRRVVQREKRAIGLGAFFLGFLGAAGSTMGAASVTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLALERYLKDQQLLGIWGCSGKLICTTNVPWNISWSPRWNRSLDEIWTNMTWKQWEKEIDNYTDIIYNLIEQSQNQQEQNEQDLLALDKWASLWNWFDITQWLWYIKIFIMIVGGLIGLRIVFTILSIVNRVRQGYSPLSLQTLLPTQRGPDRPEGTEEGGGERDRGTSTRLVHGFLALIWDDLRSLFLFSYHRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWSQEIKNSAVSLLNTTAIAVAEGTDRIIEVLQRGFRAILHIPTRIRQGLERALL,ATGAAAGTGACGGGGACCAGGAGGAGTTATCAGCACTTGTGGAGATGGGGCATCTTGTTCCTTGGGATGGTGATGATATGTAGTGCCAACAACTTGTGGGTCACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAACTACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGAGACAGAGAAGCATAATGTTTGGGCAACCCATGCCTGTGTACCCACAGACCCCAGCCCACAGGAAGTAGCATTGGAAAATGTGACAGAAACATTTAACATGTGGAAAAATGACATGGTAGAGCAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTGAAACTAACCCCACTCTGTGTTACTTTAAATTGCACTGATAATTTAAATCTAAATCGCCCTAACAATAATACTTGTAGTAATAATACTAATTATAATATAACGGAAAAAGGAGAAATAAAAAACTGCTCTTTCAATGTCACCACAGGAATAAGAGATAGGGTGACAAAAGAACATGCACTTTTCTATAAACTTGATGTAGTACCAATAGATGAAGGTAGTGGAAATACTACGGGCAAATATAGGATGATAAATTGTAACACCTCAGTCATTACACAAGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTTTTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAAGATAAGAAATTCAATGGAACAGGAGAATGTAGAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGCTGAACGGCAGTCTAGCAGAAGAAGAGATAGTAATTAGATCTGCCAATCTCTCGAACAATGCTAAAACCATAATAGTACAGCTGAATAAATCTGTAAAAATTAATTGTACAAGACCCAACAATAATACAAGAAGAAGTATACACATAGGACCAGGGAGGGCATTTTATGGAACAGACATAATAGGAGATATAAGACAAGCGCATTGTAACATTAGTGGGAAAGATTGGAATGACACTTTAAAACAGATAGTTATAAAATTAAAAGAAAAATTTGAGAATAAAACAATAGTCTTTACTCAATCCTCAGGAGGGGACCCAGAGATAGTGATGCATAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAATAGTACTTGGAATGGTACTGACAATTGGAATGGTACTGAATCAAATAGCACTATCACACTCCCATGCAGAATAAAACAAATTATAAACTTGTGGCAGGAAGTAGGAAGAGCAATGTATGCCCCTCCCATCCAAGGGCAAATTAGATGTTCATCAAATATTACAGGGCTGCTGCTAGTAAGAGATGGTGGGAGCAACAATAGTAGTAATGATACAGAGACCTTCAGGCCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTCAAAATTGAACCATTAGGAATAGCACCCACCCATGCAAAGAGAAGAGTGGTGCAAAGGGAAAAAAGAGCAATAGGACTTGGAGCTTTCTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTTTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTAATGTGCCCTGGAATATTAGTTGGAGCCCTAGATGGAATAGATCTCTAGATGAGATTTGGACTAACATGACCTGGAAGCAGTGGGAAAAGGAAATTGACAATTACACAGACATAATATATAACCTAATTGAACAATCACAGAACCAACAAGAACAGAATGAACAAGACTTATTGGCATTAGATAAGTGGGCAAGTTTGTGGAATTGGTTTGACATTACACAGTGGCTATGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGATAGGTTTAAGAATAGTTTTTACTATACTGTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTGTCATTGCAGACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAGGAACAGAAGAAGGAGGTGGAGAGAGAGACAGAGGCACATCCACGAGATTAGTGCATGGATTCTTAGCACTCATCTGGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAGAGACTTACTCTTGATTGCGGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCACTGAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAAATAAAGAATAGTGCAGTTAGCTTGCTCAACACCACAGCAATAGCAGTAGCTGAGGGGACAGATAGGATTATAGAAGTATTACAAAGAGGCTTTAGAGCTATTCTCCACATACCTACACGAATAAGACAGGGCTTAGAAAGGGCTTTGCTATAA,6223,8793,MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*,ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA -MK115065.1,tat_exon2,7843,7935,forward,0.5806451612903225,0,RPSSQPRGDPTGPKEQKKEVERETEAHPRD,AGACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAGGAACAGAAGAAGGAGGTGGAGAGAGAGACAGAGGCACATCCACGAGATTAG,8375,8467,RPTSQPRGDPTGPKEPKKKVERETETDPFD*,AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG -MK115065.1,rev_exon2,7844,8119,forward,0.423913043478261,0,DPPPNPEGTRQARRNRRRRWRERQRHIHEISAWILSTHLGRPAEPVPLQLPPLERLTLDCGEDCGTSGTQGVGSTEVLVESPAVLESGNKE,GACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAGGAACAGAAGAAGGAGGTGGAGAGAGAGACAGAGGCACATCCACGAGATTAGTGCATGGATTCTTAGCACTCATCTGGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAGAGACTTACTCTTGATTGCGGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCACTGAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAAATAAAGAATAG,8376,8651,DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*,GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG -MK115065.1,nef,8263,8904,forward,0.7104072398190044,0,MGNKLSRGLRAGWPTIRERMRRARPVREPEPAAAGVGAASRDLERHGALTSSNTAATNADVACLEAQQEEEEVGFPVRPQVPLRPMTYKGALDISHFLKEKGGLDGLIYSKRRQDILDLWLYNTQGYFPDWQNYTPGPGVRFPLCFGWCFKLVPVDPDKVEEASVGENNCLLSPENLHGMEDEHREVLQWRFDSRLAFHHVARELHPEYYKDC,ATGGGTAACAAGTTGTCAAGAGGGCTCAGGGCTGGATGGCCTACCATAAGGGAGAGAATGAGACGAGCTAGGCCAGTAAGAGAGCCAGAGCCAGCAGCAGCTGGGGTGGGAGCAGCATCTCGAGACCTGGAAAGACATGGAGCACTTACAAGTAGCAATACAGCAGCTACCAATGCTGATGTTGCCTGCCTGGAAGCACAACAGGAGGAGGAAGAGGTGGGTTTTCCAGTCAGACCTCAGGTACCTTTAAGGCCAATGACTTACAAGGGAGCTCTAGATATTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGATGGGCTAATTTACTCCAAAAGAAGACAAGACATCCTTGATCTGTGGCTCTACAACACACAAGGCTACTTCCCTGACTGGCAGAACTACACACCAGGGCCAGGAGTCAGATTTCCACTGTGTTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGATAAGGTAGAAGAGGCCAGTGTAGGAGAGAACAACTGCTTGTTAAGCCCCGAGAACCTGCATGGAATGGAGGACGAACACAGAGAAGTATTGCAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTATTACAAGGACTGCTGA,8795,9415,MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA -MK115464.1,gag,794,2296,forward,1.495,637,MTETLLVQNANPDCKTILKALRPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSNIIMMQRGNFRNQRKTVKCFNCGKERHIARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLRKIWPSSKGRPRNFLQSRPEPTAPPEESFRFREETATPPQKQEPVDKEVYPLASLKSLFGNDPSSQ,ATAAGTGCGAGAGCGTCTGTATTAAGCGGGGGAGAATTAGATAAATGGGAAAAAATTCGGTTAAGGCCAGGAGGAAAGAAAAGATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAACCCCGGCCTTTTAGAAACAACAGAAGGATGTAAACAAATACTGGAACAGCTGCAACCATCCCTTCCGACAGGATCAGAAGAACTTAGATCATTATTTAATACAGTAGCAACCCTCTATTGTGTGCATAAGAGAATAGAGGTACAAGACACCAAGGAAGCCTTAGAAAAGATAGAAGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAGGCAGTAGCTGACAAAGGAAGTACCAGCCAGGTCAGCCAAAATTACCCGATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGGCCATATCGCCTAGAACTTTAAATGCATAGGTGAAAGTAGTAGAAGAGAAGGCCTTTAGCCCAGAGGTAATACCCATGTTTTCAGCATTATCGGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAGGCTGCAGAATAAGATAGAGTGCATCCAGTGCATGCAGGGCCTGTTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATAGATGACAAGTAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTAGGATTAAATAAGATAGTAAGAATGTATAGCCCTACCAGCATTTTGGACATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAGTCCTAAGAGCCGAGCAAGCATCACAGGATGTAAAAAATTAGATGACAGAAACCTTATTAGTCCAAAATGCAAACCCAGATTGTAAGACTATTTTAAAAGCATTAAGACCAGCAGCAACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAAATATCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGACTGTTAAGTGTTTCAATTGTGGCAAAGAAAGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAAGGAAGATCTGGCCTTCCTCCAAAGGAAGGCCAAGGAACTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTAGGGAGGAGACAGCAACTCCTCCTCAGAAGCAGGAGCCGGTGGACAAAGAAGTGTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAA,789,2291,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA -MK115464.1,pol,2089,5100,forward,0.3214711729622268,1716,AKIKQECGIPYNPQSQEVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDNRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIKDYGKQMAGDDCVASRQDED,TTTTTTAAGGAAGATCTGGCCTTCCTCCAAAGGAAGGCCAAGGAACTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTAGGGAGGAGACAGCAACTCCTCCTCAGAAGCAGGAGCCGGTGGACAAAGAAGTGTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGAGGGCAGCTAAAGGAAGCTCTATTAGATACAGGAGCAGATGACACAGTATTAGAAGAAATAAGTTTGCCAGGAAGATAGAAACCAAAAATGATAGAAGGAATTGGAGGCTTTATCAAAGTAAGACAGTATGATCAGATAACTATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGCAGAAATTTGTTGACTCAGATTAGTTGTACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTACAATTAAAACCAGGAATAGATAGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAAGAAAAAAAGATTTCAAAAATTAGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGAAGAAAATTAGTAGATTTCAAGGAACTTAATAAAAGAACTCAAGACTTCTAAGAAGTTCAATTAAGAATACCACACCCCGCAAGGTTAAAAAAGAAGAAATCAATAACAGTACTAGATGTAGGTGATGCATATTTTTCAATTCCCTTAGATAAAGACTTCAAGAAGTATACTGCATTTACCATACCTAGTATAAATAATAAGACACCAGAGATTAGATATCAGTACAATGTGCTTCCACAGGGATAGAAAAGATCACCAGCAATATTCCAAAGTAGCATGATAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGACATAGTTATCTATCAATACATAGATGACTTGTATGTAAGATCTGACTTAGAAATAAGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACACTTGTTGAAGTAGAGATTGACCACACCAGATAAAAAACATCAGAAAGAACCCCCATTCCTGTGGATAAGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGACAAAGATAGCTGGACTGTCAATGACATACAGAAGCTAGTAAGAAAATTGAATTGAGCAAGTCAGATTTATGCAGAGATTAAAGTGAGACAATTATGTAAACTCCTTAAAGGAGCCAAAGCGCTAACAGAAGTGATACAACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAAAGAAATTCTAAAAGAACCAGTACATGAAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAGTTACAGAAGCAGAGGCAAGGCCAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAGTATGCAAGAACGAGAGGTACCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTGATATGAGGAAAGACCCCTAGATTTAAACTACCCATACAGAAAGAAACATAAGATACCTAGTGGACAGAATATTGGCAAGCCACCTAGATTCCCGAGTAAGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATAGTACCAATTAGAAAAAGAGCCTATTGTAGGAGCAGAAACTTTCTATGTAGATAGGGCAGCTAATAAAGAGACTAAATTAAGAAAAGCAGGATATGTTACTAGCAGAGGAAGACAAAAGGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAACTGCAAGCAATTTGTCTAGCATTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCACTAAGAATAATTCAAGCACAACCAGATAAGAGTGAATCAGAGATAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTACCTTGCATAGGTACCAGCACACAAAAGAATTAGAAGAAATGAACAAGTAGATAAATTAGTCAGTGCTAGAATCAGGAAAGTCCTATTTTTAGATAGAATAGATAAGGCCCAAGAAGAGCATAAGAAATATCACAATAATTAAAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAAAAATAGTAGCCAGTTGTGATAAATGCCAGCTAAAAAAAGAAGCCACGCATAGACAAGTAGACTGTAGTCCAAGAATATGGCAATTAGATTGTACACATTTAGAAGGAAAAGTTATCCTAGTAGCAGTTCATGTAGCCAGTAGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAAGAAACAGCATACTTCCTCTTAAAATTAGCAGGAAGGTAGCCAGTGAAAGCAATACATACAGACAATGGAACCAATTTCACCAGTGCTACGGTTAAGGCCGCCTGTTAGTAGGCAAAGATCAAGCAGGAATGTGGCATTCCCTACAATCCCCAAAGTCAAGAAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAAGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTCCAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAACAGAGATCCACTTTGGAAAGGACCAGCAAAACTTCTTTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAAGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG,2084,5095,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK115464.1,vif,5045,5623,forward,1.494270833333335,248,MENRWQVMIVWQVDRMRISTWKSLVKHHMYISKKAQGWFYRHHYENPHPRISSEVHIPLGDARLVITTY,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGCACATGGAAAAGTTTAGTAAAACACCATATGTACATTTCAAAGAAAGCCCAAGGATGGTTTTATAGACATCACTATGAAAATCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCGCTAGGGGATGCTAGATTGGTAATAACAACATATTAGGGTCTGCATACAGGAGAAAGAGACTGGCATTTAGGTCAAGGAGTCTCCATAGAATGGAAGGAAAGGAAATATAGCACACAAGTAACCCCTGACCTAGCAGACCAACTAATTCACCTGTATTATTTTGATTGTTTTGCAGAATCTGCTATAAGAGAGGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCACTAACAGCATTAGTAACACCAAAAAAGACAAAGCCACCTTTGCCTAGTGTGACAAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAAGGAGCCAAACAATGAATGGACACTAG,5040,5618,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG -MK115464.1,vpr,5563,5853,forward,0.375,0,MEQAPEDQGPQKEPNNEWTLELLEELKREAVRHFPRTWLHGLGQHIYETYEDTWTGVGALIRILQQLLFIHFRIRCQHSRIGITRQRRARNRASRS,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAAGGAGCCAAACAATGAATGGACACTAGAGCTCTTAGAAGAGCTTAAGAGAGAAGCTGTTAGACATTTTCCTAGGACATGGCTTCATGGCTTAGGACAACATATCTATGAAACTTATGAGGATACTTGGACAGGAGTAGGAGCCTTAATAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGAATTAGGTGCCAACATAGCAGAATAGGCATCACTCGACAGAGGAGAGCAAGAAATAGAGCCAGTAGATCCTAG,5558,5848,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -MK115464.1,tat_exon1,5834,6048,forward,0.45833333333333326,129,MRILGQE,ATAGAGCCAGTAGATCCTAGATTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAGACTCCTTGTACCAAATGCTATTGTAAAAAATGCTGCTTTCATTGCCAAGTTTGTTTCACAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTACCTCTACCAAAGCA,5829,6043,MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115464.1,rev_exon1,5973,6048,forward,0.46153846153846145,0,MAGRSGDSDEDLLKTVRLIKYLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTACCTCTACCAAAGCA,5968,6043,MAGRSGDSDEELIRTVRLIKLLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115464.1,vpu,6065,6310,forward,1.8817073170731702,124,MQSLYILTIVALVVAAILAIVV,ATGCAATCTTTATATATATTAACAATAGTAGCATTAGTAGTAGCAGCAATATTAGCAATAGTTGTGTAGGCCATAGTACTCATAGAATATAAGAAAATATTAAAACAAAGGAGAATAGATAGGTTAATTGATAGAATAATAGATAAGGCAGAAGACAGTGGCAATGAGAGTGAAGGAGATCAAGAGGAGTTATCAGCATTTGTAGAGATAGGGCATCATGCTCCTTAGAATGTTGATGATCTATAG,6060,6308,MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*,ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG -MK115464.1,env,6228,8798,forward,1.8316091954022926,1449,MTNCTFNITTSIKDKIKKEAALFYKIDLVEIDEKKNNSSTRYRLINCNTSAITQACPKVSFKPIPIHFCAPASFAILKCNNKKFSGKGPCTNVSTVQCTHRIKPVVSTQLLLNGSLAEEEVMIRSDNITDNTKNIIVQLKEAIRIFCIRPNNNTRKSINIRPGRAFYTTGDIIRDIRQAHCNISGNWSNTLKQIATQLGKQLNQTQQIIFNSSAGKDPEIVTHSFNCGKKFFYCNSSSLFNST,ATGAGAGTGAAGGAGATCAAGAGGAGTTATCAGCATTTGTAGAGATAGGGCATCATGCTCCTTAGAATGTTGATGATCTATAGTACTGCAGACCAGTGGTGGGTCACAGTCTATTATAAGGTACCTGTGTGGAGAGAAGCAAACACCACTCTATTTTGTGCATCAGATGCTAAAGCATATAGTACAGAGGCACATAATGTTTAGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAATAGTAATAGAAAATGTAACAGAAGATTTTAACATGTGGAAGAATAACATGGTAGATCAGATGCATGAGGATATAATCAGTTTATAGGATCAAAGTCTAAAGCCATGTGTGAAACTAACCCCACTCTGTGTTACTTTAAATTGTACTGATATTAGGAATAGTACCGTTACTTCTAATACTACTTCTAATACTACTTAGGGAGAAATGACAAACTGCACTTTCAATATCACCACAAGCATAAAGGATAAGATAAAAAAAGAAGCGGCACTTTTTTATAAAATTGATTTAGTAGAAATAGATGAAAAGAAAAATAACAGTAGTACCAGATATAGGTTGATAAATTGTAACACCTCAGCCATTACACAAGCCTGCCCAAAGGTATCCTTTAAGCCAATTCCCATACATTTTTGTGCCCCGGCTAGTTTTGCAATTCTAAAGTGTAATAATAAGAAGTTCAGTGGAAAAGGACCATGTACAAATGTCAGCACAGTGCAATGTACACATAGAATTAAGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAATGATTAGATCTGACAATATCACAGACAATACTAAAAACATAATAGTACAGTTGAAAGAAGCCATACGAATTTTCTGTATAAGACCCAACAACAATACAAGAAAAAGTATAAATATAAGACCAGGAAGAGCATTTTATACAACAGGAGATATAATAAGAGACATAAGGCAAGCACATTGTAACATTAGTGGAAATTGGAGTAACACTTTAAAACAGATAGCTACACAATTAGGAAAACAACTGAATCAAACACAACAAATAATCTTTAATTCATCCGCAGGAAAGGACCCAGAGATTGTAACACACAGTTTTAATTGTGGAAAGAAATTCTTCTATTGTAATTCATCATCACTGTTTAATAGTACCTAGACTAAAAATGGTACTGATAGTTGGCAGTCTAATGATACTCAGAATAGTAATATCACACTCCAATGCAGAATAAAACAAATTATAAACCTGTGGCAGGAAGTAAGAAAAGCAATGTATGCCCCTCCCATCAGTAGACAAATTAACTGTACATCAAATATTACAGGGCTAGTTTTAACAAGAGATAGGAGGAATGAAACTAAGACCTTTAGACCTGGAAGAGAAAATATGAAGGATAATTGGAGAAGTAAATTATATAAATATAAAGTAGTAAGAATTGAACCATTAAGAATAGCACCCACCAAGGCAAAAAGAAGAGTGGTGCAGAGAGAGAAAAGAGCAGTAAGACTAGGAGCTATGTTCCTTAAGTTCTTAGGAGCAGCCAGAAGCACTATAGGCGCAGCGTCGATAGCGCTGACGGAACAGGCCAGACGAGTCTTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATTTGTTGCAACTCACAGTCTAAGGCATTAAGCAGCTCCAGGCAAGAGTCCTGGCTGTAGAAAGATACCTACAGGATCAACAGCTCCTAGGACTTTGAGGTTGCTCTAGAAAACTCATTTGCACCACTACTGTGCCTTAGAATCGTAGTTGAGGTAGGCATAACAAAAATTACAAAAGTCTAGATGACATTTAGGATAACATGACCTAGATAGAGTAGGAAAAAGAAATTGACAATTACACAAGCTTAATATACACCTTAATTACAGAATCACATTCCCAACAAAAAAAGAATGAACAAGAATTATTGGCATTAGATAAATAGGCAAGTTTGTAGAATTAGTTTGACATATCACAATGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTAGTAAGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAAAGTTAGGCAAGGATACTCACCATTATCATTTCAGACCCTCCTCCCAGCCCCGAGAGGAGCCGACAGGCCCGAAAGAATAGAAGAAAGAGGTAGAGAGAGAGACAAAGGCAGATCCGGTCGATTAGTGAACGGATTCTTAGCACTTATCTAGGACGATCTACGGAGCCTATGCCTCTTCAGCTACCACCGCTTGAGCGACTTACTCTTGATTGTGATCAAGATTGTGGAACTTCTAAGACGCAAGAGGTAAGAAGCCCTCAAATATTAGTAGAATCTCCTGCAGTACTAGAGTCAGGAACTAAAGAATAGTGCTGTCAGCTTGCTCAACACCACAGCTATAGTAGTAGCTGAAAGGACAGATAAGATAATAGAAATATTACAAAGAATTAGTAGAGCTTTTCTCCACATACCTAGGAGAATAAGACAGGGCTTAGAAAAGGCTTTGCTATAA,6223,8793,MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*,ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA -MK115464.1,tat_exon2,8380,8472,forward,0.7741935483870968,37,RPSSQPREEPTGPKE,AGACCCTCCTCCCAGCCCCGAGAGGAGCCGACAGGCCCGAAAGAATAGAAGAAAGAGGTAGAGAGAGAGACAAAGGCAGATCCGGTCGATTAG,8375,8467,RPTSQPRGDPTGPKEPKKKVERETETDPFD*,AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG -MK115464.1,rev_exon2,8381,8656,forward,0.5217391304347827,45,RERQRQIRSISERILSTYLGRSTEPMPLQLPPLERLTLDCDQDCGTSKTQEVRSPQILVESPAVLESGTKE,GACCCTCCTCCCAGCCCCGAGAGGAGCCGACAGGCCCGAAAGAATAGAAGAAAGAGGTAGAGAGAGAGACAAAGGCAGATCCGGTCGATTAGTGAACGGATTCTTAGCACTTATCTAGGACGATCTACGGAGCCTATGCCTCTTCAGCTACCACCGCTTGAGCGACTTACTCTTGATTGTGATCAAGATTGTGGAACTTCTAAGACGCAAGAGGTAAGAAGCCCTCAAATATTAGTAGAATCTCCTGCAGTACTAGAGTCAGGAACTAAAGAATAG,8376,8651,DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*,GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG -MK115464.1,nef,8800,9450,forward,1.1589371980676328,178,MTYKAALDLSHFLKEKGGLEGLVYSQKRQDILDLWIYHTQGYFPDWQNYTPGPRIRYPLTFGWCFKLVPLEPDQVEEANEGENNSLLHPLSQHRMDDPEKEVLVWRFDSRLAFHHMAREKHPEFYKDC,ATAGGTGGCAAGTGGTCAAAAAGTAGTAAGGTTAAATAGAATGCAGTGAAAGAAAGAATAAGACGAGCTCAGCCAACAGCAGATAAAGAACGAGCTGAGCCAGCAGCAGATAAGGTAAGAGCAGCATCTAGAGACCTAGAAAAATATGGAGCACTTACAAGTAAGAATACAGCAGCTACTAATGCTGATTGTGCCTGGCTAGAAGCACAAGAAGAGGAGGATGAGGTAGGTTTTCCAGTCAGACCTCAGTTACCTTTAAGACCCATGACTTATAAAGCAGCTCTAGATCTGAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAGTTTACTCCCAAAAAAGGCAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTATTTCCCTGATTGGCAGAACTACACACCAGGGCCAAGGATCAGATATCCCCTGACCTTTGGATGGTGCTTCAAGCTAGTACCACTTGAGCCAGATCAGGTAGAAGAGGCCAATGAAGGAGAGAACAACAGCCTGTTACACCCTCTGAGCCAGCATAGGATGGATGACCCGGAGAAAGAAGTGCTAGTGTGGAGATTTGACAGCCGCCTCGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTTCTACAAAGACTGCTGA,8795,9415,MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA -MK115530.1,gag,746,2245,forward,0.2675944333996021,0,MGARASILSGGKLDSWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE,ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATTCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGTCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGTTAATGTAATGATGCAAAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA,789,2291,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA -MK115530.1,pol,2038,5049,forward,0.15109343936381703,0,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVHQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACGTGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCCATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTGCTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG,2084,5095,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK115530.1,vif,4994,5572,forward,0.37823834196891193,0,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPDLADQLIHLYYFDCFSESAIRNAILGHIVNPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACACATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTGACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAATCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG,5040,5618,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG -MK115530.1,vpr,5512,5802,forward,0.2886597938144331,0,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS,ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG,5558,5848,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -MK115530.1,tat_exon1,5783,5997,forward,0.25,0,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA,5829,6043,MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115530.1,rev_exon1,5922,5997,forward,0.46153846153846145,0,MAGRSGDSDEELLKAVRLIKILYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA,5968,6043,MAGRSGDSDEELIRTVRLIKLLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115530.1,vpu,6014,6259,forward,0.6931034482758622,0,MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL,ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG,6060,6308,MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*,ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG -MK115530.1,env,6177,8717,forward,0.528604118993135,0,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTESHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSKEGKMAEEMRNCSFNITTEIRNKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKSVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL,ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGTCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAATATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTAAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAGAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATGATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAGTGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGGGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGGGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTATTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA,6223,8793,MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*,ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA -MK115530.1,tat_exon2,8299,8391,forward,0.29032258064516125,0,RPTSQPRGDPTGPKESKKKVEKETETDQFD,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG,8375,8467,RPTSQPRGDPTGPKEPKKKVERETETDPFD*,AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG -MK115530.1,rev_exon2,8300,8575,forward,0.423913043478261,0,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG,8376,8651,DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*,GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG -MK115530.1,nef,8719,9333,forward,0.5333333333333332,0,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTYKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC,ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTACAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTACGAGCCTGCATGGGGTGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA,8795,9415,MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA -MK115520.1,gag,695,2194,forward,0.2675944333996021,0,MGARASILSGGKLDSWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE,ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATTCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGTTAATGTAATGATGCAAAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA,789,2291,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA -MK115520.1,pol,1987,5003,forward,1.448607975921763,1225,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEAFLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDMGNGQYSL,TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAAATAGGGGGGCAATTTAAAGAAGCTTTCTTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTAAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATATGGGAAATGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTGCTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG,2084,5095,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK115520.1,vif,4948,5526,forward,0.37823834196891193,0,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG,5040,5618,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG -MK115520.1,vpr,5466,5756,forward,0.2886597938144331,0,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS,ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG,5558,5848,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -MK115520.1,tat_exon1,5737,5951,forward,0.25,0,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGCAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA,5829,6043,MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115520.1,rev_exon1,5876,5951,forward,0.46153846153846145,0,MAGRSGDSDEELLKAVRLIKILYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA,5968,6043,MAGRSGDSDEELIRTVRLIKLLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115520.1,vpu,5968,6213,forward,0.6931034482758622,0,MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL,ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG,6060,6308,MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*,ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG -MK115520.1,env,6131,8671,forward,0.5251716247139588,0,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEVHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRDKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGNMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLDQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL,ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAGATAAGATGCAGAAAGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATGATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGGGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGGGACTGAGACCTTCAGACCTGGAGGAGGAAATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGGATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTTAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA,6223,8793,MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*,ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA -MK115520.1,tat_exon2,8253,8345,forward,0.29032258064516125,0,RPTSQPRGDPTGPKESKKKVEKETETDQFD,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG,8375,8467,RPTSQPRGDPTGPKEPKKKVERETETDPFD*,AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG -MK115520.1,rev_exon2,8254,8529,forward,0.423913043478261,0,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG,8376,8651,DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*,GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG -MK115520.1,nef,8673,9287,forward,0.5478260869565217,0,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDNEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC,ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAATGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGATACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTGCACCCTACGAGCCTGCATGGGGTGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA,8795,9415,MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA -MK115503.1,gag,817,2316,forward,0.2675944333996021,0,MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE,ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGACGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGCTAATGTAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA,789,2291,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA -MK115503.1,pol,2109,5120,forward,0.15109343936381703,0,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTACTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG,2084,5095,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK115503.1,vif,5065,5643,forward,0.36269430051813467,0,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG,5040,5618,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG -MK115503.1,vpr,5583,5873,forward,0.2886597938144331,0,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS,ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG,5558,5848,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -MK115503.1,tat_exon1,5854,6068,forward,0.25,0,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA,5829,6043,MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115503.1,rev_exon1,5993,6068,forward,0.46153846153846145,0,MAGRSGDSDEELLKAVRLIKILYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA,5968,6043,MAGRSGDSDEELIRTVRLIKLLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115503.1,vpu,6085,6330,forward,0.727586206896552,0,MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL,ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAAAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG,6060,6308,MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*,ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG -MK115503.1,env,6248,8788,forward,0.5241695303550973,0,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL,ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGAAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAAAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA,6223,8793,MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*,ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA -MK115503.1,tat_exon2,8370,8462,forward,0.29032258064516125,0,RPTSQPRGDPTGPKESKKKVEKETETDQFD,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG,8375,8467,RPTSQPRGDPTGPKEPKKKVERETETDPFD*,AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG -MK115503.1,rev_exon2,8371,8646,forward,0.3913043478260869,0,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG,8376,8651,DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*,GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG -MK115503.1,nef,8790,9404,forward,0.5333333333333332,0,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC,ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTATGAGCCTGCATGGGATGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA,8795,9415,MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA -MK115570.1,gag,687,2186,forward,0.2616302186878725,0,MGARASILSGGKLDEWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE,ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGAATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCGACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGCTAATGTAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA,789,2291,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA -MK115570.1,pol,1979,4990,forward,0.14811133200795235,0,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTGCTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG,2084,5095,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK115570.1,vif,4935,5513,forward,0.36269430051813467,0,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTATTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG,5040,5618,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG -MK115570.1,vpr,5453,5743,forward,0.2886597938144331,0,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS,ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG,5558,5848,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -MK115570.1,tat_exon1,5724,5938,forward,0.25,0,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA,5829,6043,MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115570.1,rev_exon1,5863,5938,forward,0.46153846153846145,0,MAGRSGDSDEELLKAVRLIKILYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA,5968,6043,MAGRSGDSDEELIRTVRLIKLLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115570.1,vpu,5955,6200,forward,0.6931034482758622,0,MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL,ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG,6060,6308,MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*,ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG -MK115570.1,env,6118,8658,forward,0.5325714285714285,0,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTSEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDKDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL,ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCTCAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAAGGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGGGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA,6223,8793,MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*,ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA -MK115570.1,tat_exon2,8240,8332,forward,0.29032258064516125,0,RPTSQPRGDPTGPKESKKKVEKETETDQFD,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG,8375,8467,RPTSQPRGDPTGPKEPKKKVERETETDPFD*,AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG -MK115570.1,rev_exon2,8241,8516,forward,0.3913043478260869,0,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG,8376,8651,DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*,GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG -MK115570.1,nef,8660,9274,forward,0.5333333333333332,0,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC,ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTATGAGCCTGCATGGGATGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA,8795,9415,MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA -MK115509.1,gag,555,2054,forward,0.2675944333996021,0,MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE,ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGACGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGCTAATGTAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA,789,2291,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA -MK115509.1,pol,1847,4858,forward,0.15109343936381703,0,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTACTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG,2084,5095,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK115509.1,vif,4803,5381,forward,0.36269430051813467,0,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG,5040,5618,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG -MK115509.1,vpr,5321,5611,forward,0.2886597938144331,0,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS,ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG,5558,5848,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -MK115509.1,tat_exon1,5592,5806,forward,0.25,0,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA,5829,6043,MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115509.1,rev_exon1,5731,5806,forward,0.46153846153846145,0,MAGRSGDSDEELLKAVRLIKILYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA,5968,6043,MAGRSGDSDEELIRTVRLIKLLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115509.1,vpu,5823,6068,forward,0.727586206896552,0,MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL,ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAAAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG,6060,6308,MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*,ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG -MK115509.1,env,5986,8526,forward,0.5241695303550973,0,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL,ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGAAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAAAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA,6223,8793,MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*,ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA -MK115509.1,tat_exon2,8108,8200,forward,0.29032258064516125,0,RPTSQPRGDPTGPKESKKKVEKETETDQFD,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG,8375,8467,RPTSQPRGDPTGPKEPKKKVERETETDPFD*,AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG -MK115509.1,rev_exon2,8109,8384,forward,0.3913043478260869,0,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG,8376,8651,DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*,GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG -MK115509.1,nef,8528,9142,forward,0.5333333333333332,0,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC,ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTATGAGCCTGCATGGGATGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA,8795,9415,MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA -MK115702.1,gag,246,1781,forward,0.342940038684721,0,MGARASVLSGGELDKWEKIRLRPGGRKRYKLKHIVWASRELERFAVNPGLLETSEGCKQIMGQLQPALQTGSEELRSLYNTVAVLYCVHQRIDVKDTKEALDKIEEEQNKSKKKTQQAAAADTGNNSQVSQNYPIVQNMQGQMVHQAISPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVQAGPVAPGQIREPRGSDIAGTTSTLQEQIAWMTHNPPVPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKGWMTETLLVQNANPDCKTILKALGPGATLEEMMTACQGVGGPSHKARVLAEAMSQATGAHAIMMQRGNFKNQRKTVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESLRPTAPPVESFRFGEETAAPFQKQEPRDKEMSPLASLKSLFGNDQ,ATGGGTGCGAGAGCGTCAGTATTAAGCGGCGGCGAATTAGACAAATGGGAAAAAATTCGGTTAAGGCCAGGAGGAAGGAAACGATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGCTTCGCAGTCAACCCTGGCCTGTTAGAAACATCAGAAGGCTGCAAACAAATAATGGGACAACTCCAACCAGCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAGTCCTCTATTGTGTACATCAGAGGATAGATGTAAAGGATACCAAAGAAGCTTTAGACAAAATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGACACAGCAAGCAGCAGCCGCTGACACAGGAAACAACAGCCAAGTCAGCCAAAATTACCCCATAGTGCAGAACATGCAGGGACAAATGGTACATCAGGCCATATCACCCAGAACCCTAAATGCATGGGTAAAAGTAATAGAAGAGAAGGCATTCAGCCCAGAAGTAATACCCATGTTTTCAGCACTATCAGAGGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAGGCTGCAGAATGGGATAGAGTGCATCCAGTGCAGGCAGGACCTGTTGCACCAGGCCAGATAAGGGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACACATAATCCACCCGTCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGACTAAATAAAATAGTAAGGATGTATAGCCCTGTCAGCATTTTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAGACTCTAAGAGCTGAGCAAGCTTCACAGGAAGTAAAAGGTTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACCATTTTAAAAGCATTGGGACCAGGAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCAGCCATAAGGCAAGGGTTTTGGCAGAAGCAATGAGCCAAGCAACAGGTGCACATGCCATAATGATGCAGAGAGGCAATTTTAAGAACCAAAGAAAGACTGTTAAGTGCTTCAATTGTGGCAAAGAAGGGCATATAGCCAGAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGACTGCACTGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAGGAGAGCCTCAGGCCAACAGCCCCACCAGTAGAGAGCTTCAGGTTTGGGGAAGAGACAGCAGCCCCCTTTCAGAAGCAGGAACCGAGAGACAAGGAGATGTCTCCCTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCAGTAGTCACAATAA,789,2291,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA -MK115702.1,pol,1544,4585,forward,0.20128078817733996,0,FFRENLAFPQGKAGEFPSEQTRANSPTRGEPQANSPTSRELQVWGRDSSPLSEAGTERQGDVSLSFPQITLWQRPVVTIKIGGQIKEALLDTGADDTVLEEMALPGRWKPKMIGGIGGFIKVRQYDQIAIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKIKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPIFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSITVLDVGDAYFSVPLDEEFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQCSMTKILEPFRKQNPEIVIYQYMDDLYVGSDLEIGQHRTKIEELRGHLLKWGFTTPDKKHQKEPPFLWMGYELHPDRWTVQPIKLPEKEIWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPTKELIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKISTESIVIWGKTPKFKLPIQKETWEIWWTDYWQATWIPEWEFVSTPPLVKLWYQLEKEPIVGAETFYVDGASNRETKLGKAGYITDRGRQKVVTLNDTTNQKTELQAILLALQDSGLEANIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKIYLTWVPAHKGIGGNEQVDKLVSTGIRKVLFLDGIDRAQEEHERYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQIDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKTIHTDNGSNFTSTTVKAACWWAGVKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQISKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAGGAGAGCCTCAGGCCAACAGCCCCACCAGTAGAGAGCTTCAGGTTTGGGGAAGAGACAGCAGCCCCCTTTCAGAAGCAGGAACCGAGAGACAAGGAGATGTCTCCCTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCAGTAGTCACAATAAAGATAGGGGGGCAAATAAAGGAAGCTCTATTAGACACAGGAGCAGATGATACAGTATTAGAAGAAATGGCGTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATAGCCATAGAAATTTGTGGACATAAAGCAATTGGTACAGTATTAGTAGGACCTACACCTGTCAATATAATTGGAAGAAATCTATTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAGTTAAAGCCAGGAATGGATGGCCCAAAAATTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATAGGGCCTGAAAATCCATACAATACTCCAATATTTGCCATAAAGAAAAAAGACAGTACTAAGTGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGATTAAAAAAGAAAAAATCAATAACAGTACTGGATGTGGGTGATGCCTATTTTTCAGTTCCCTTAGATGAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGCATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAATGTAGCATGACAAAAATATTAGAGCCTTTTAGAAAACAAAATCCAGAAATAGTTATCTATCAATACATGGACGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTAAGAGGACATCTGTTGAAGTGGGGGTTTACCACACCAGACAAAAAACATCAGAAGGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAGATGGACAGTACAGCCTATAAAGCTGCCAGAGAAAGAAATCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAACTGGGCAAGTCAGATTTACCCAGGAATTAAAGTAAAACAATTATGTAAACTCCTTAGGGGAACCAAAGCATTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAAATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCAACAAAAGAATTAATAGCAGAAATACAGAAGCAAGGGCAAGGCCAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCGAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAGAAAATATCTACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCCATACAAAAGGAAACATGGGAAATATGGTGGACAGACTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAGTACTCCTCCCCTAGTAAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCATCTAATAGGGAGACTAAATTAGGAAAAGCAGGATATATTACTGACAGAGGAAGACAAAAGGTTGTCACCCTAAATGACACAACCAATCAAAAGACAGAGTTACAAGCAATTCTTCTAGCATTGCAGGATTCAGGATTAGAAGCAAACATAGTGACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGATCTACCTGACATGGGTACCAGCACATAAAGGAATTGGAGGAAATGAGCAAGTAGATAAATTAGTCAGTACTGGGATTAGGAAAGTATTATTTTTAGATGGAATAGATAGGGCCCAAGAAGAGCATGAGAGATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTCAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCTATGCATGGACAAATAGACTGTAGTCCAGGAATATGGCAGCTAGATTGTACACACTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATACTTTATCTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTACCACAGTTAAGGCCGCCTGTTGGTGGGCGGGGGTCAAGCAGGAATTTGGTATTCCCTACAATCCCCAGAGTCAAGGAGTGGTAGAATCTATGAATAAAGAATTAAAGAAAATAATAGGACAGGTCAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATTCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTTCAAAAATTCAAAACTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCGGTAGTAATACAGGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG,2084,5095,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK115702.1,vif,4530,5108,forward,0.34375,0,MANRWQVMIVWQVDRMRIRTWNSLVKHHMYVSKKTKGWFYRHHYESTHPKISSEVHIPLGDAELVVTTYWGLQPGERDWHLGQGVSIEWRKGRYRTHVDPNLADQLIHLHYFDCFSESAIRHAILGHRVSPRCEYQAGHNKVGSLQYLALAALRAPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGTATGTTTCAAAGAAAACTAAGGGATGGTTTTATAGACATCACTATGAGAGCACTCATCCAAAAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTGAGTTGGTAGTAACAACATATTGGGGTTTGCAGCCAGGGGAAAGGGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAGGGAGATATAGAACACACGTGGACCCTAACCTAGCAGACCAACTAATTCATCTGCATTACTTTGATTGTTTTTCAGAATCTGCTATAAGACATGCCATATTAGGACATAGAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCACTAGCAGCATTAAGAGCACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACTAAACTAACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAG,5040,5618,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG -MK115702.1,vpr,5048,5338,forward,0.34375,0,MEQAPEDQGPQREPYNEWTLELLEELKQEAVRHFPRPWLHSLGQYIYETYGDTWAGVEAIIRMLQQLLFIHFRIGCQHSRIGIIPQRRARNGSSRS,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAGAGCTTTTAGAAGAACTTAAGCAGGAAGCTGTTAGGCATTTTCCTAGGCCATGGCTTCATAGCTTAGGGCAATATATCTATGAAACTTATGGGGATACTTGGGCAGGGGTGGAAGCCATAATAAGAATGCTGCAACAACTGCTGTTTATTCATTTCAGAATTGGGTGTCAACATAGCAGAATAGGCATTATCCCACAGAGGAGAGCAAGAAATGGATCCAGTAGATCCTAA,5558,5848,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -MK115702.1,tat_exon1,5319,5533,forward,0.6818181818181821,0,MDPVDPNLEPWKHPGSQPKTACNNCYCKKCCLHCQVCFTRKGLGISYGRKKRRRRRGSLKGRQAHQDSLSKX,ATGGATCCAGTAGATCCTAACCTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAGACTGCTTGTAACAATTGCTATTGTAAAAAGTGTTGCCTTCATTGCCAAGTTTGTTTCACACGAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACGGAGACGAGGATCTCTTAAAGGCCGTCAGGCTCATCAAGACTCTCTATCAAAGCA,5829,6043,MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115702.1,rev_exon1,5458,5533,forward,0.6923076923076923,0,MAGRSGDGDEDLLKAVRLIKTLYQSX,ATGGCAGGAAGAAGCGGAGACGGAGACGAGGATCTCTTAAAGGCCGTCAGGCTCATCAAGACTCTCTATCAAAGCA,5968,6043,MAGRSGDSDEELIRTVRLIKLLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115702.1,vpu,5550,5807,forward,0.945263157894737,0,MLSLEVIVAITALVVAGIIAIVVWTIVLIEYRKILRQRKIDKILDRIRERAEDSGNESEGDQEELSALVEMGHNAHHAPWDIND,ATGTTATCTTTAGAAGTAATAGTAGCAATAACAGCATTAGTAGTAGCAGGAATAATAGCAATAGTTGTGTGGACCATAGTACTTATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAAGATACTTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAGGGGGATCAGGAAGAATTGTCAGCGCTTGTGGAGATGGGGCACAATGCTCACCATGCTCCTTGGGATATTAATGATTAGTAA,6060,6308,MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*,ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG -MK115702.1,env,5716,8273,forward,0.6706208425720628,39,MRVRGIRKNCQRLWRWGTMLTMLLGILMISNATEQLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEMVLINVTENFNMWKNDMVDQMQEDIVSLWDQSLKPCVKLTPLCVTLNCTNLTIEPNNATKANISGRLEGKGEMTNCSFNVTTSLRDKRKKEYALFYKLDVVATGENNNSFRLISCNTSEITQACPKVSFEPIPIHYCAPAGFAILKCNDKKFNGTGKCNNVSIVQCTHGIRPVVSTQLLLNGSLAEEEVVVRSANFSDNTKTIIVQLNKTVVINCTRPNNNTRRSIHIAPGRAFYATGDIIGDIRKAHCNISKEDWNTTLNQVAKKLQEQFENATIDFKPSSGGDPEIVMHSFNCGGEFFYCNTTELFSWNATTKLFTWNATNSNNGTIILPCRIKQIINMWQEVGKAMYAPPIRGQIRCSSNITGLLLTRDGGTNGTGNRNETFRPGGGNMKDNWRSELYKYKVVEIKPLGVAPTKAKRRVVQREKRAVTIGAMFLGFLGAAGSTMGAASLTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLRDQQLLGIWGCSGKLICTTTVPWNISWSNRTLNNIWDNLTWMQWDTEINNYTNKIYQLLEEAQNQQEKNEQELLELDKWANLWNWFDISNWLWYIKIFILIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTRFPVPRGPDRPEGTEEEGGERDRDRSDRLVNGFLTLIWVDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNVLQYWSQELKNSAVSLLNATAIVVAEGTDRIIELAQRICRAE,ATGAGAGTGAGGGGGATCAGGAAGAATTGTCAGCGCTTGTGGAGATGGGGCACAATGCTCACCATGCTCCTTGGGATATTAATGATTAGTAATGCTACAGAACAATTGTGGGTCACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAACAACCACTCTATTCTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAATGGTATTAATAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGATCAAATGCAAGAGGACATAGTCAGCTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACCTTAAATTGCACTAATTTGACCATTGAGCCAAACAATGCTACTAAAGCCAATATTAGTGGGAGGTTAGAGGGGAAAGGAGAAATGACAAACTGCTCTTTCAATGTCACCACAAGCCTAAGAGATAAGAGGAAGAAAGAATATGCACTCTTTTATAAACTTGATGTAGTAGCAACAGGTGAAAATAATAACAGCTTTAGGTTGATAAGTTGTAATACCTCAGAGATTACACAGGCCTGTCCAAAGGTATCATTTGAACCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAATGATAAAAAGTTCAATGGAACAGGAAAATGTAACAATGTCAGCATAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAGTTAGATCTGCCAATTTCTCAGACAATACTAAGACCATAATAGTACAGCTGAACAAAACTGTAGTAATTAATTGTACAAGACCCAACAACAATACAAGGAGAAGTATACATATAGCACCAGGGAGAGCATTTTATGCAACAGGAGATATAATAGGAGATATAAGAAAAGCACATTGTAACATTAGTAAAGAAGATTGGAATACCACTTTAAACCAGGTGGCTAAAAAATTACAAGAACAATTTGAGAATGCAACAATAGACTTTAAACCATCCTCAGGAGGGGACCCAGAAATTGTAATGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACAACGGAACTATTTTCTTGGAATGCTACAACAAAACTGTTTACTTGGAATGCTACAAATAGCAATAATGGAACCATCATACTCCCATGTAGAATAAAACAAATTATAAACATGTGGCAAGAGGTAGGAAAAGCAATGTATGCCCCTCCCATTCGTGGACAAATTAGATGTTCGTCAAATATTACAGGACTGCTATTAACAAGAGATGGTGGGACTAACGGGACAGGGAACAGGAATGAGACCTTCAGACCTGGAGGGGGAAATATGAAGGACAATTGGAGAAGTGAATTATATAAATACAAAGTAGTAGAAATTAAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGACCATAGGAGCTATGTTCCTTGGGTTCCTGGGGGCAGCAGGAAGCACTATGGGCGCAGCATCACTGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATCTGCTGAGGGCGATTGAGGCGCAGCAGCATCTGTTGCAACTCACAGTCTGGGGCATAAAACAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGGTACCTAAGAGATCAACAGCTCCTGGGGATTTGGGGCTGCTCTGGAAAACTCATCTGCACCACTACTGTGCCTTGGAATATTAGTTGGAGTAATAGAACTCTGAATAACATTTGGGACAATTTGACTTGGATGCAGTGGGATACAGAAATTAACAATTACACAAACAAAATATACCAATTACTTGAGGAAGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAATTTGTGGAATTGGTTTGACATATCAAACTGGCTGTGGTACATAAAAATATTCATATTAATAGTAGGAGGCTTAATAGGTTTAAGAATAGTTTTTACTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTTCAGACCCGCTTCCCAGTCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGACAGATTAGTGAACGGATTCTTGACACTTATCTGGGTCGATCTGAGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCACTCAAATATTGGTGGAATGTCCTGCAATATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTAAATGCCACAGCCATAGTAGTAGCTGAGGGGACAGATAGGATTATAGAATTAGCACAAAGAATTTGTAGAGCAGAATAAGACAGGGCTTGAAAAGGCTTTTGCTATAA,6223,8793,MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*,ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA -MK115702.1,tat_exon2,7874,7966,forward,0.29032258064516125,0,RPASQSRGDPTGPKEPKKKVERETETDPTD,AGACCCGCTTCCCAGTCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGACAGATTAG,8375,8467,RPTSQPRGDPTGPKEPKKKVERETETDPFD*,AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG -MK115702.1,rev_exon2,7875,8150,forward,0.326086956521739,0,DPLPSPEGTRQARRNRRRRWRERQRQIRQISERILDTYLGRSEEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSTQILVECPAILESGTKE,GACCCGCTTCCCAGTCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGACAGATTAGTGAACGGATTCTTGACACTTATCTGGGTCGATCTGAGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCACTCAAATATTGGTGGAATGTCCTGCAATATTGGAGTCAGGAACTAAAGAATAG,8376,8651,DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*,GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG -MK115702.1,nef,8275,8895,forward,0.5454976303317536,0,MGGKWSKHSKSEWADVRERMAQTEAAADGVGAVSRDLERHGAITSSNTATNNAACAWLEAQEEEEVGFPVKPQVPLRPMTYKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRFPLCFGWCFKLVPVDPDKVEEANKGENNSLLHPMSLHGMEDTEREVLMWKFDSRLAFHHVAREKHPEYFKDC,ATGGGTGGCAAGTGGTCAAAACATAGTAAGAGTGAATGGGCTGATGTAAGGGAAAGAATGGCACAAACTGAGGCAGCAGCAGATGGGGTGGGAGCAGTATCTCGAGATCTGGAAAGACATGGAGCAATCACAAGTAGCAATACAGCAACTAACAATGCTGCTTGTGCTTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCAAACCTCAGGTGCCTTTGAGACCAATGACCTACAAGGGAGCTTTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGTTAATTTATTCCCAAAAAAGACAAGACATCCTTGATCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGAACCAGATTCCCACTGTGCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGATAAGGTGGAAGAAGCCAATAAAGGAGAGAACAACAGCTTGTTACACCCTATGAGCCTGCATGGGATGGAGGACACCGAGAGAGAGGTGTTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACGTAGCCAGAGAGAAACATCCGGAGTACTTCAAGGACTGCTGA,8795,9415,MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA -MK115095.1,gag,188,1696,forward,1.8384000000000005,806,MTETLLVQNANPDCKTILKALRPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTKSASIMVQGGNLKNQRKNVKCFNCGKEGHTAKNCRAPKKKGC,ATAGGTGCGAGAGCGTCAGTATTAAGCGGCGGAGAATTAGATAGATAGGAAAAAATTTACCTAAGGCCAGGGAGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTCAACCCTGGCCTGTTAAAGTCATCAGAAGGCTGCAGGCAAATTCTAGGACAGCTACAACCAGCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAATAGCAGTCCTCTATTGTGTACATCAAAAGATAAATGTAAAAGACACCAAAGAAGCTCTAGACAAAATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAACAAGCAGCAGCTGACACAGGAAACAGCAGCAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACCTACAAGGGCAAATAGTACATCAACCCATATCACCTAGAACTTTAAATGCATAGGTAAAAGTAATAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCACTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGAGAAGACATCAAGCAGCCATGCAGATGTTAAAAGAAACCATCAATGATGAAGCTGCAGAATAAGATAGATTGCATCCAGTGCATGCAGGCCCTATTGCACCAGGCCAGATGAAGGAACCAAGAAGAAGTGACATAGCAGGAACCACTAGTACCCTTCAGGAACAAATAAGATAGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATAGATAATCCTGAGGTTAAATAAAATAGTAAAAATGTATAGCCCTGTCAGCATTTTGGACATAAGACAAAGACCAAAGGAACCCTTTAAGGATTATGTAGACCGGTTCTATAAAACTCTAAAGGCTGAACAAGCGTCACAGGATGTAAAAAATTAGATGACAGAAACCTTGTTAGTCCAAAATGCGAATCCAGATTGTAAGACCATTTTAAAAGCATTAAGACCAGCAGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCACAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAAGTCAGCCTCCATAATGGTGCAAGGAGGCAATTTAAAGAACCAAAGAAAGAATGTTAAGTGTTTCAATTGTGGCAAAGAAGGACACACAGCCAAAAATTGCAGGGCCCCTAAGAAAAAAGGCTGTTAGAAATGTGGAAAGAAAGGACACCAAATGAAAGATTGTACTAAGAGACAGGCTAATTTTTTAAGGAAGATTTGGCCTTCCCACAAGGGAAGGCCAAAGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTAAGGAAGAGGCAACAGCTCCCCCTCAGAAGCAGGAGACGAAGGACCAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAA,789,2291,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA -MK115095.1,pol,1489,4500,forward,0.45376984126984143,1865,KPKIIEGIRGFIKVRQYDQVPIKICRHKAISTVLIRPTPVNIIRRNLLTQLSCTLNFPISPIETVPVKLKPGIDSPKVKQWPLTEEKIKALIEICAEIEKERKITKIRPKNPYNTPVFAIKKKDSTK,TTTTTTAAGGAAGATTTGGCCTTCCCACAAGGGAAGGCCAAAGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTAAGGAAGAGGCAACAGCTCCCCCTCAGAAGCAGGAGACGAAGGACCAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAAAAATAAGAAGGCAACTAAAAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAAAAGAAATAAATTTGCCAGGAAAATAGAAACCAAAAATAATAGAAGGAATTAGAGGTTTTATCAAAGTAAGACAGTATGATCAGGTACCCATAAAAATTTGTAGACATAAAGCTATAAGTACAGTATTAATAAGACCTACACCAGTCAACATAATTAGAAGAAATCTGTTGACCCAGCTTAGTTGCACTTTAAATTTTCCCATCAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATAGATAGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAGGCATTAATAGAAATTTGTGCAGAAATAGAAAAAGAAAGGAAAATTACAAAAATTAGGCCTAAGAATCCATATAATACTCCAGTGTTTGCCATAAAGAAAAAAGACAGTACTAAATAGAGAAAATTAGTAGATTTCCGAGAACTTAATAAAAGAACACAAGACTTTTAAAAAGTTCAACTAAGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTAGATGTAGGTGATGCATATTTTTCAGTTCCTTTAGACAAAGACTTCAGAAAGTATACTGCATTTACCATACCCAGTATAAACAATGAGGCACCAAGAGTTAGATATCAGTACAATGTGCTTCCACAAAGATAGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAAGAAAATCCAGACATAGTTATCTATCAATACATAGATGATTTGTATGTAAGATCTGACTTAGAAATAGAGCAGCACAGAACAAAAATAGAAGAACTGAGACAACATCTGTTAAAGTAAAGACTCACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTTCTTTAGATAAGTTATGAACTCCATCCTAATAAATGGACAGTACAGCCTATACAGCTGCCAGACAAAGACAGCTAGACTGTCAATGACATACAGAAGTTAGTAAGAAAGTTAAATTAGGCAAGCCAGATCTATCCAGAGATTAAAGTAAAGCAATTATGTAAACTCCTTAGAAGAACCAAAGCACTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAAGGAGATTCTAAAAGAACCAGTACATAGAGCATATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGAGGAGACCAATGGACATATCAAATTTATCAGAAGCCATTTAAAAATCTGAAAACAAAGAAATATGCAAGAACGAGAGGTGCCCACACTAATGATGTAAAACAGTTAACAGAGGCAGTGCAAAAAATAGCCCTAGAAGCCATAGTAATATAGAGAAAGACTCCTAAATTTAAACTACCTATACAAAAAGAAACATAAGAAATGTAGTAGACAGAGTATTGGCAAGCCACCTAGATTCCTGAGTAGGAATTTGTCAATACCCCTCCCTTAGTAAAATTATAGTACCAGTTAGAGAAAGAACCCATAGTAAGAGCAGAAACTTTCTATGTAGATAGGGCAGCTAATAGAGAGACTAAATTAAGAAAAGCAAGGTATGTTACGGACAGAAGAAGACAAAAAGTTGTCTCCCTAATAGACACAACAAATCAGAGGACTAAGTTACACGCAATTCATCTAGCTTTGCAGGATTCAGGATCAACAGTAAACATAGTGACAGACTCACAATATGCCTTAAAGATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGCTAATAAAAAAAGAAAAAATTTACCTGGCATAAGTCCCAGCACACAAAAGAATTAGAAGAAATGAACAAGTAGATAAATTAGTCAGTAGTAGAATCAGAAAAGTACTATTTCTAGATAGAATAGATAAAGCCCAAGAAGAACATGAAAAATATCACAGTAATTAAAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAGTGTCAGTTAAAAAGAGAACCTATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAAGAAAAATTATCCTAGTAGCAGTTCATGTAGCCAGTAGATACATAGAAGCAGAAGTTATTGCAGCAGAAACAGGGCAGGAAACAGCATACTTTATCTTAAAATTGGCAGGAAGATGGCCAGTAAAAATAATACATACAGACAATGGCAGCAATTTCACCAGCACTACAGTTAAGGCCGCCTGTTAGTAGGCAAGGATCAAGCAGAAATTTAGTATTCCCTACAATCCTCAAAGTCAAGGAGTAGTAGAATCTATGAATAATGAATTAAAGAAAATTATAGGACAGGTAAAAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATTCACAATTTTAAAAGAAAAAGGGGGATTGGGGGGTACAGTGCAGAGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAACTACAAAAACAAATTTCAAAAATTCAAAATTTTCAGGTTTATTACAGGGACAGCAGAGAACCACTTTGGAAAGGACCAGCAAAGCTCCTCTAGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCCTTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAAGATGAGGATTAG,2084,5095,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK115095.1,vif,4445,5023,forward,1.8546875000000016,332,MENRWQVMIVWQVDKMRIRTWNSLVKHHMHISKKAQR,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAAGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATATTTCAAAGAAAGCTCAGAGATGAGTTTACAGACATCACCATGAAAGCCATAATCCAAAAACAAGTTCAGAAGTACACATCCCATTAAGGGAAGCAAGATTAGTAATAAAAACATATTAAGGTCTGCATACAGGAGAAAGAGACTGGCATTTAGGCCAGGGAGTATCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGGCCTGGCAGACCAACTAATTCACCTGTATTATTTTGATTGTTTTTCAGAATCTGCTATAAGGAATGCCATATTAGGACATAGAGTTAGGCCTAGGTGTGAATATCAGGCAGGACATAACAAGGTAGGATCCCTACAATACTTGGCATTAACAGCATTAATAACACCAAAAAGGAGAAAGCCACCTTTGCCCAGTGTTACAAAATTAACAGAGGACAGATGGAACAAGTCCCAGAGGACCAAAGGCCACAAAAGGAGCCATACAATGAATGGACATTAG,5040,5618,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG -MK115095.1,vpr,4963,5253,forward,0.5,0,MEQVPEDQRPQKEPYNEWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDTWAEVEAIIRTLQQLLFIHFRIRCQHSRIRIIRQRRARNRASRS,ATGGAACAAGTCCCAGAGGACCAAAGGCCACAAAAGGAGCCATACAATGAATGGACATTAGAGCTCTTAGAAGAACTTAAGAGTGAAGCTGTTAGGCATTTTCCTAGACCATGGCTTCATAGCTTAGGACAATATATCTATGAAACTTATGGAGATACTTGGGCAGAGGTAGAAGCCATAATAAGAACCCTGCAACAACTGTTGTTCATTCATTTCAGAATTAGGTGTCAACATAGCAGGATAAGAATTATTCGACAGAGGAGAGCAAGAAATAGAGCCAGTAGATCCTAA,5558,5848,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -MK115095.1,tat_exon1,5234,5448,forward,0.6756756756756757,35,MTACNNCYCKRCCFHCQVCFTRKGLGISHGRKKRRQRRRASHSSQNHQAALPEX,ATAGAGCCAGTAGATCCTAACTTAGAGCCCTGGAAACATCCAGGAAGTCAACCTATGACTGCTTGTAACAATTGCTATTGTAAACGGTGTTGCTTTCATTGCCAAGTTTGCTTCACAAGAAAAGGCTTAGGCATCTCCCATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCA,5829,6043,MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115095.1,rev_exon1,5373,5448,forward,0.5925925925925926,0,MAGRSGDSDEELLTAVRIIKRLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCA,5968,6043,MAGRSGDSDEELIRTVRLIKLLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115095.1,vpu,5465,5710,forward,1.8817073170731702,122,MHALEIAAIVRLVVAAIIAIVV,ATGCATGCCTTAGAAATAGCAGCAATAGTAAGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTAGTCTATAGTATTAATAGAATATAAGAAAATTTTAAGACAAAAGAAAATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAAAGTGACGAGGACCAGGAAGAATTATCAGCAATTGTAGAGATAGGGCATCTTGTTCCTTAGGATAGTGATGATATGTAG,6060,6308,MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*,ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG -MK115095.1,env,5628,8228,forward,1.8735260115606882,1426,MTEEGEIKNCSFNVTTGIRDKVTKEHALFYKLDVVPIDESSGNTTGKYRMINCNTSVITQACPKVSFEPIPIHFCAPASFAILKCKDKKFNRTGECRNVSTVQCTHRIRPVVSTQLLLNGSLAEEEIVIRSANLSNNAKTIIVQLNKSVKINCTRPNNNTRRSIHIGPRRAFYRTDIIGDIRQAHCNISRKD,ATGAAAGTGACGAGGACCAGGAAGAATTATCAGCAATTGTAGAGATAGGGCATCTTGTTCCTTAGGATAGTGATGATATGTAGTGCCAACAACTTGTAGGTCACAGTCTATTATGAGGTACCTGTATGGAAAGAAGCAACTACCACTCTATTTTGTGCATCAGATGCTAAAGCATATAAGACAGAGAAGCATAATGTTTGGGCAACCCATGCCTGTGTACCCACAGACCCCAGCCCACAGGAAGTAGCATTAGAAAATGTGACAGAAACATTTAACATGTGGAAAAATGACATGGTAGAGCAGATGCATGAGGATATAATCAGTTTATAAGATCAAAGCCTAAAGCCATGTGTGAAACTAACCCCACTCTGTGTTACTTTAAATTGCACTGATGAATTAAATCTAAATTGCCCTAACAATAATACTTGTAGTAATAATACTAAATATAATATGACGGAAGAAGGAGAAATAAAAAACTGCTCTTTCAATGTCACCACAGGAATAAGAGATAAGGTGACAAAAGAACATGCACTTTTCTATAAACTTGATGTAGTACCAATAGATGAAAGTAGTGGAAATACTACAGGCAAATATAGGATGATAAATTGTAACACCTCAGTCATTACACAAGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTTTTGTGCCCCGGCTAGTTTTGCGATTCTAAAGTGTAAAGATAAGAAATTCAATAGAACAGGAGAATGTAGAAATGTCAGCACAGTACAATGTACACATAGAATTAGGCCAGTAGTATCAACTCAACTGCTGCTGAACGGCAGTCTAGCAGAAGAAGAGATAGTAATTAGATCTGCCAATCTCTCGAACAATGCTAAAACCATAATAGTACAGCTGAATAAATCTGTAAAAATTAATTGTACAAGACCCAACAATAATACAAGAAGAAGTATACACATAGGACCAAGGAGGGCATTTTATAGAACAGACATAATAGGAGATATAAGACAAGCGCATTGTAACATTAGTAGGAAAGATTAGAATGACACTTTAAAACAGATAGTTATAAAATTAAAAGAAAAATTTAAGAATAAAACAATAGTCTTTAATCAATCCTCAGGAAAGGACCCAGAGATAGTGATGCATAGTTTTAATTGTAGAGAAGAATTTTTCTACTGTAATACAACACAGCTGTTTAATAGTACTTAGAATAATAATACTTAGAATGGTACTGATAATTAGAATAGTACTGAATCAAATAGCACTATCACACTCCCATGCAGAATAAAACAAATTATAAACTTGTGGCAGGAAGTAAGAAGAGCAATGTATGCCCCTCCCATCCAAGGGCAAATTAGATGTTCATCAAATATTACAGGGCTGCTGCTAGTAAGAGATAGTAAGAGCAACAATAGTAGTAATGATACAAAGACCTTCAGGCCTAGAAGAGGAGATATGAAGGACAATTAGAGAAGTGAATTATATAAATATAAAGTAGTCAAAATTGAACCATTAAGAATAGCACCCACCCATGCAAAGAGAAGAGTGGTGCAGAAAGAAAAAAGAGCAATAGGACTTAGAGCTTTCTTCCTTAAGTTCTTAGGAGCAGCAGGAAGCACTATAGGCGCAGCGTCAGTGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTAAGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTTTAGAAAGATACCTAAAAGATCAACAGCTCCTGAAGATTTGAGGTTGCTCTAGAAAACTCATTTGCACCACTAATGTGCCCTAAAATGTTAGTTAGAGCCCTAGATAGAATAGATCTCTAGATAAGATTTAGACTAACATGACCTAGAAGCAGTAGGAAAAAGAAATTGACAATTATACAGACACAATATATAACTTAATTGAACAATCACAGAACCAACAAGAACAGAATGAACAAGACTTATTAGCATTAGATAAGTAGGCAAGTTTGTAGAATTAGTTTGACATTACACAGTGGCTATAGTATATAAAAATATTCATAATGATAGTAAGAGGCTTGATAAGTTTAAGAATAGTTTTTACTATACTGTCTATAGTGAATAGAGTTAGGCAGAGATACTCACCATTGTCATTGCAGACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAAGAACAGAAGAAGGAGGTAGAGAGAAAGACAGAGGCACATCCACGAGATTAGTGCATAGATTCTTAGCACTCATCTAGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAGAGACTTACTCTTGATTGTGGCGAGGATTGTAGAACTTCTAAGACGCAGAAAGTAAGAAGCACTGAAGTATTGGTAGAATCTCCTGCAGTATTAGAGTCAGGAAATAAAGAATAGTGCAGTTAGCTTGCTCAACACCACAGCAATAGCAGTAGCTGAAAGGACAGATAAGATTATAGAAGTATTACAAAGAGGCTTTAGAGCTATTCTCCACATACCTACACGAATAAGACAGGGCTTAGAAAAAGCTTTGCTATAA,6223,8793,MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*,ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA -MK115095.1,tat_exon2,7810,7902,forward,0.6774193548387097,0,RPSSQPRGDPTGPKEQKKEVERKTEAHPRD,AGACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAAGAACAGAAGAAGGAGGTAGAGAGAAAGACAGAGGCACATCCACGAGATTAG,8375,8467,RPTSQPRGDPTGPKEPKKKVERETETDPFD*,AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG -MK115095.1,rev_exon2,7811,8086,forward,0.6195652173913044,69,ILSTHLGRPAEPVPLQLPPLERLTLDCGEDCRTSKTQKVRSTEVLVESPAVLESGNKE,GACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAAGAACAGAAGAAGGAGGTAGAGAGAAAGACAGAGGCACATCCACGAGATTAGTGCATAGATTCTTAGCACTCATCTAGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAGAGACTTACTCTTGATTGTGGCGAGGATTGTAGAACTTCTAAGACGCAGAAAGTAAGAAGCACTGAAGTATTGGTAGAATCTCCTGCAGTATTAGAGTCAGGAAATAAAGAATAG,8376,8651,DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*,GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG -MK115095.1,nef,8230,8871,forward,0.8680555555555556,40,MRQARPVRKPEPAATKVRAASRDLERHGALTSSNTAATNADVACLEAQQEEKEVGFPVRPQVPLRPMTYKGALDISHFLKEKGGLDGLIYSKRRQDILDLWLYNTQGYFPDWQNYTPRPGVRFPLCFRWCFKLVPVDPDKVEEASVGENNCLLSPENLHRIEDEHREVLQWRFDSRLAFHHVARELHPEYYKDC,ATAGGTAACAAGTTGTCAAGAAGGCTCAGGGCTAGATGGCCTGCCATAAAAGAAAGAATGAGACAAGCTAGGCCAGTAAGAAAGCCAGAGCCAGCAGCAACTAAGGTAAGAGCAGCATCTCGAGACCTAGAAAGACATGGAGCACTTACAAGTAGCAATACAGCAGCTACCAATGCTGATGTTGCCTGCCTAGAAGCACAACAGGAAGAAAAAGAGGTAGGTTTTCCAGTCAGACCTCAGGTACCTTTAAGGCCAATGACTTACAAAGGAGCTCTAGATATTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGATGGGCTAATTTACTCCAAAAGAAGACAAGACATCCTTGATCTGTGGCTCTACAACACACAAGGCTACTTCCCTGACTGGCAGAACTACACACCAAGGCCAGGAGTCAGATTTCCACTGTGTTTTAGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGATAAGGTAGAAGAGGCCAGTGTAGGAGAGAACAACTGCTTGTTAAGCCCCGAAAACCTGCATAGAATAGAAGACGAACACAGAGAAGTATTGCAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTATTACAAGGACTGCTGA,8795,9415,MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA -MK115490.1,gag,549,2048,forward,0.2675944333996021,0,MGARASILSGGKLDSWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE,ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATTCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGTCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGTTAATGTAATGATGCAAAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA,789,2291,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA -MK115490.1,pol,1841,4852,forward,0.15109343936381703,0,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVHQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACGTGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCCATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTGCTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG,2084,5095,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK115490.1,vif,4797,5375,forward,0.37823834196891193,0,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPDLADQLIHLYYFDCFSESAIRNAILGHIVNPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACACATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTGACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAATCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG,5040,5618,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG -MK115490.1,vpr,5315,5605,forward,0.2886597938144331,0,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS,ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG,5558,5848,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -MK115490.1,tat_exon1,5586,5800,forward,0.25,0,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA,5829,6043,MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115490.1,rev_exon1,5725,5800,forward,0.46153846153846145,0,MAGRSGDSDEELLKAVRLIKILYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA,5968,6043,MAGRSGDSDEELIRTVRLIKLLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115490.1,vpu,5817,6062,forward,0.6931034482758622,0,MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL,ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG,6060,6308,MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*,ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG -MK115490.1,env,5980,8520,forward,0.528604118993135,0,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTESHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSKEGKMAEEMRNCSFNITTEIRNKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKSVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL,ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGTCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAATATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTAAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAGAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATGATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAGTGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGGGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGGGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTATTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA,6223,8793,MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*,ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA -MK115490.1,tat_exon2,8102,8194,forward,0.29032258064516125,0,RPTSQPRGDPTGPKESKKKVEKETETDQFD,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG,8375,8467,RPTSQPRGDPTGPKEPKKKVERETETDPFD*,AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG -MK115490.1,rev_exon2,8103,8378,forward,0.423913043478261,0,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG,8376,8651,DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*,GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG -MK115490.1,nef,8522,9136,forward,0.5333333333333332,0,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTYKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC,ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTACAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTACGAGCCTGCATGGGGTGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA,8795,9415,MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA -MK115576.1,gag,468,1967,forward,0.2675944333996021,0,MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE,ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGACGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGCTAATGTAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA,789,2291,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA -MK115576.1,pol,1760,4771,forward,0.15109343936381703,0,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTACTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG,2084,5095,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK115576.1,vif,4716,5294,forward,0.36269430051813467,0,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG,5040,5618,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG -MK115576.1,vpr,5234,5524,forward,0.2886597938144331,0,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS,ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG,5558,5848,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -MK115576.1,tat_exon1,5505,5719,forward,0.25,0,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA,5829,6043,MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115576.1,rev_exon1,5644,5719,forward,0.46153846153846145,0,MAGRSGDSDEELLKAVRLIKILYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA,5968,6043,MAGRSGDSDEELIRTVRLIKLLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MK115576.1,vpu,5736,5981,forward,0.727586206896552,0,MQPLHIAAIVGLIVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL,ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAATAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG,6060,6308,MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*,ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG -MK115576.1,env,5899,8439,forward,0.5247139588100684,0,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTPEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL,ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCCCAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA,6223,8793,MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*,ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA -MK115576.1,tat_exon2,8021,8113,forward,0.29032258064516125,0,RPTSQPRGDPTGPKESKKKVEKETETDQFD,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG,8375,8467,RPTSQPRGDPTGPKEPKKKVERETETDPFD*,AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG -MK115576.1,rev_exon2,8022,8297,forward,0.3913043478260869,0,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG,8376,8651,DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*,GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG -MK115576.1,nef,8441,9055,forward,0.5333333333333332,0,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC,ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTACGAGCCTGCATGGGATGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA,8795,9415,MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA -OQ092466,gag,825,2360,forward,0.21325536062378148,0,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPSLLETAEGCRQILGQLQPSLQTGSEELKSLYNTLATLYCVHQRIEVKDTKEALEKIEEEQNKSKKKAQQAAADTGNSSQVRHTGNSSQVSQNYPIVQNLQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTSNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSHVTNSSAIMMQRGNFRNQRKAVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPPEEIFRFVEETTTPSQKQEPIDKELYPPLASLKSLFGNDPSSQ,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTGGATAGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAACCCTAGCCTGTTAGAAACAGCAGAAGGCTGTAGACAAATATTGGGACAGTTACAACCGTCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATATAATACATTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGGTAAAAGACACCAAGGAAGCCTTAGAGAAGATAGAGGAAGAGCAAAACAAAAGCAAGAAAAAGGCACAGCAAGCAGCAGCTGACACAGGAAACAGCAGCCAGGTTAGACACACAGGAAACAGCAGCCAGGTCAGCCAAAATTACCCTATAGTACAGAACCTTCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAATACCATGCTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGACTACATCCAGTGCATGCAGGGCCCATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAGTAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGGATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTACACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATCTTAAAAGCATTGGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGGGGACCCGGACATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCACGTAACAAATTCAAGTGCCATAATGATGCAGAGGGGCAATTTTAGAAACCAAAGAAAGGCTGTTAAGTGTTTCAATTGTGGCAAAGAGGGGCATATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCACCACCAGAAGAGATCTTCAGGTTTGTGGAAGAGACAACAACTCCCTCTCAGAAACAGGAGCCAATAGACAAGGAACTGTATCCTCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAA,789,2291,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA -OQ092466,pol,2147,5164,forward,0.14711729622266412,0,FFRENLAFPQRKAREFSPEQTRANSPTTRRDLQVCGRDNNSLSETGANRQGTVSSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEDMNLPGRWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPYRTRNPEMVIYQYMDDLYVGSDLEIGQHRAKIEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPNKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGEGQWTFQIYQEPFKNLKTGKYARARGAHTNDVKQLTEAVQKIATEGIVIWGKIPKFRLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKKGKAGYVTDRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIEKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAIHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQIIKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCACCACCAGAAGAGATCTTCAGGTTTGTGGAAGAGACAACAACTCCCTCTCAGAAACAGGAGCCAATAGACAAGGAACTGTATCCTCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAGCTAAAGGAAGCTCTATTAGATACAGGGGCAGATGATACAGTATTAGAAGACATGAATTTACCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAGGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTATTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATAAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGCATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGTATGACAAAAATCTTAGAGCCTTATAGAACACGAAATCCAGAAATGGTTATCTATCAATACATGGATGATTTATATGTAGGATCTGACTTAGAAATAGGGCAACATAGAGCAAAAATAGAGGAACTGAGACAACATCTGTTGAAGTGGGGATTTACTACCCCAGACAAAAAACATCAAAAAGAACCTCCATTCCTTTGGATGGGGTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAAACAAAGACAGCTGGACTGTCAATGACATACAGAAACTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACCCAGGGATTAAGGTAAAGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAGTAGTACCGCTAACAGAAGAAGCAGAGTTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGAACCAGTACATGGGGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGGAAGGACAATGGACATTTCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAGCGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAGCCACAGAAGGCATAGTAATATGGGGAAAAATTCCTAAATTTAGACTACCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAACACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAACAGGGAAACTAAAAAAGGAAAAGCAGGATATGTTACTGACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGATACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATACGCATTGGGAATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTCAGTCAAATAATAGAACAGTTGATAAAAAAGGAAAAGGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTTAGTGCTGGAATCAGGAAAGTATTATTTTTAGATGGAATAGAGAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAGTGTCAGCTAAAAGGAGAAGCCATACATGGACAGGTAGACTGTAGTCCAGGAATATGGCAATTAGATTGTACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTCATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATACGGTTAAGGCCGCCTGTTGGTGGGCGGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATCCAAACCAAAGAACTACAAAAACAAATTATAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTTATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG,2084,5095,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -OQ092466,vif,5109,5687,forward,0.203125,0,MENRWQVMIVWQVDRMRIRTWNSLVKHHMYVSRKAKGWFYRHHFESNHPKISSEVHIPLEDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPDLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKRKPPLPSVKKLTEDRWNKPQKTKGHRGSHTMNGH,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGTATGTTTCAAGGAAAGCTAAGGGATGGTTTTATAGACATCACTTTGAAAGCAATCATCCAAAAATAAGTTCAGAAGTACACATCCCACTGGAGGATGCTAGACTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGGGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGACCTAGCAGACCAACTAATTCATCTGTACTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACACATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCACTAGCAGCATTAATAACACCAAAAAAGAGAAAGCCACCCTTGCCTAGTGTTAAGAAGCTAACAGAAGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG,5040,5618,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG -OQ092466,vpr,5627,5917,forward,0.21875,0,MEQAPEDQGPQREPYNEWTLELLEELKSEAVRHFPRIWLHSLGQYIYETYGDTWAGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRARNGASRS,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAGCTTAAGAGTGAAGCTGTTAGACATTTTCCTAGGATATGGCTCCATAGCTTAGGGCAATATATCTATGAAACTTATGGAGATACTTGGGCAGGGGTGGAAGCCATAATAAGAATTCTGCAACAACTGTTGTTTATTCATTTCAGAATTGGGTGTCAACATAGCAGAATAGGCATTATTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG,5558,5848,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -OQ092466,tat_exon1,5898,6112,forward,0.41666666666666674,0,MEPVDPRLEPWKHPGSQPRTACTTCYCKKCCFHCQVCFTTKGLGISYGRKKRRQRRRAPQDSQTDQGPLPKX,ATGGAGCCAGTAGATCCTAGGCTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTGCTTGTACCACTTGTTATTGTAAAAAATGCTGCTTTCATTGCCAAGTTTGTTTCACAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGACGAGCTCCTCAAGACAGTCAGACTGATCAAGGTCCTCTACCAAAGCA,5829,6043,MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -OQ092466,rev_exon1,6037,6112,forward,0.46153846153846145,0,MAGRSGDSDDELLKTVRLIKVLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGACGAGCTCCTCAAGACAGTCAGACTGATCAAGGTCCTCTACCAAAGCA,5968,6043,MAGRSGDSDEELIRTVRLIKLLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -OQ092466,vpu,6129,6374,forward,0.6395348837209305,0,MQPLTILAIVALVVAAILAIVVWSIVLIEYRKILRQRKIDRLIDRIRERAEDSGNESEGDQEELSALVERGHLAPWNVDDL,ATGCAACCTTTAACAATATTAGCAATAGTAGCACTAGTAGTAGCAGCAATACTAGCAATAGTTGTGTGGTCCATAGTACTCATAGAATATAGGAAAATATTAAGACAAAGGAAAATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAAAGTGAGGGGGATCAGGAAGAATTATCAGCACTGGTGGAGAGGGGGCATCTTGCTCCTTGGAATGTTGATGATCTGTAA,6060,6308,MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*,ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG -OQ092466,env,6292,8874,forward,0.5852808988764047,0,MKVRGIRKNYQHWWRGGILLLGMLMICNATEQQLWVTVYYGVPVWKEANTTLFCASDAKAYSTEVHNVWATHACVPTDPNPQEVVLKNVTENFNMWENNMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNTTRSSGNTTNEMKNCSFYTETDIRDKKRKEYALFYELDIVPIDEDNKNKSNNISYSRLISCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNDKKFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEDEVVIKSSNFTNNAKTIIVQLNETVKINCTRPNNNTRKSIPIGPGRAFYATGDIIGDIRQAHCNISRANWTNTLKQIAEKLGKQFEENKTIVFNPSSGGDPEVVMHSFNCRGEFFYCNSTPLFNSTWKETNGIWTRIGESNDSATITLNDSDTITLQCKIRQIINLWQEVGKAMYAPPIKGQISCLSNITGLLLVRDGGNNTNGTEIFRPVGGEMRDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRATLGALFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLRDQQLLGIWGCSGKLICTTTVPWNTSWSNKSLEKIWNNMTWMEWEREIDNYTSLIYTLLEESQNQQEKNEKELLELDTWASLWNWFDITNWLWYIKIFIMIIGGLVGLRIVFTVLSIVNRVRQGYSPLSFQIHPPAPRGPDRPEGIEGEGGERDRDRSGPLVNGFLALFWVDLRSLCLFSYHRLRDLLLIVARIVELLGRRGWEALKYGWSLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEILQRACRAILHIPTRIRQGLERALL,ATGAAAGTGAGGGGGATCAGGAAGAATTATCAGCACTGGTGGAGAGGGGGCATCTTGCTCCTTGGAATGTTGATGATCTGTAATGCTACAGAACAACAATTGTGGGTTACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAAACACCACTCTATTTTGTGCATCAGATGCTAAAGCATATAGTACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTGGTATTAAAAAATGTGACAGAAAATTTTAATATGTGGGAAAATAACATGGTAGAACAGATGCATGAAGATATAATCAGTTTATGGGATCAAAGCTTAAAGCCATGTGTAAAGTTAACTCCACTCTGTGTTACTCTAAATTGCACTAATACCACTAGGAGTAGTGGAAATACTACCAATGAAATGAAAAACTGCTCTTTCTATACCGAAACAGACATAAGAGATAAGAAGAGAAAGGAATATGCACTTTTTTATGAACTTGATATAGTACCCATAGATGAGGATAATAAGAATAAGAGTAATAATATTAGCTATTCTAGGTTAATAAGTTGCAACACCTCAGTTATTACACAAGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAATGATAAGAAATTCAATGGAACAGGGCCATGTACAAATGTCAGCACAGTGCAATGTACACATGGTATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGACGAGGTAGTAATTAAATCTAGCAATTTCACGAACAATGCTAAAACCATAATAGTACAGCTAAATGAAACTGTAAAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGTATACCTATAGGACCAGGGAGAGCATTTTATGCAACAGGAGACATAATAGGAGACATAAGACAAGCACATTGTAACATCTCTAGAGCAAACTGGACAAACACTTTAAAACAGATAGCTGAAAAATTAGGAAAACAATTTGAGGAAAATAAAACAATAGTCTTTAATCCCTCCTCAGGAGGGGACCCAGAGGTTGTAATGCACAGTTTTAATTGTAGAGGGGAATTTTTCTACTGTAATTCAACACCACTGTTTAATAGTACTTGGAAGGAGACTAATGGGATTTGGACTCGTATTGGAGAGTCAAATGATAGTGCTACTATCACACTAAATGATAGTGATACTATCACACTCCAATGTAAAATAAGGCAAATTATAAACTTGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAAAGGACAAATTAGCTGTTTATCAAACATTACAGGGCTGCTATTAGTAAGAGATGGTGGCAATAACACGAACGGGACCGAGATCTTCAGACCTGTAGGAGGAGAAATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAGAGAGAGAAAAGAGCGACATTGGGAGCTCTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAGGCCAGACTATTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTAAGGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATCTGCACCACTACTGTGCCTTGGAATACTAGTTGGAGTAATAAATCTCTGGAAAAAATTTGGAATAATATGACCTGGATGGAGTGGGAAAGAGAAATTGACAATTACACAAGCTTAATATACACCTTACTTGAAGAATCGCAGAACCAGCAAGAAAAAAATGAAAAAGAATTATTGGAATTAGATACATGGGCAAGTTTGTGGAATTGGTTTGACATAACAAACTGGCTGTGGTATATAAAAATATTCATAATGATAATAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTACTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTCCAGATCCACCCCCCAGCTCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGGAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGTCCATTAGTGAACGGATTCTTAGCACTTTTCTGGGTCGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCTCTCAAATACGGGTGGAGTCTCCTACAATATTGGAGCCAGGAACTAAAGAATAGTGCTGTCAGCTTGCTCAACGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAATATTACAAAGAGCTTGTAGAGCTATTCTCCATATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCTTTGCTATAA,6223,8793,MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*,ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA -OQ092466,tat_exon2,8456,8548,forward,0.6774193548387097,0,RSTPQLRGDPTGPKESKEKVERETETDPVH,AGATCCACCCCCCAGCTCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGGAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGTCCATTAG,8375,8467,RPTSQPRGDPTGPKEPKKKVERETETDPFD*,AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG -OQ092466,rev_exon2,8457,8732,forward,0.3586956521739131,0,DPPPSSEGTRQARRNRRRRWRERQRQIRSISERILSTFLGRPAEPVPLQLPPLERLTLDCSEDCGTSGTQGVGSSQIRVESPTILEPGTKE,GATCCACCCCCCAGCTCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGGAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGTCCATTAGTGAACGGATTCTTAGCACTTTTCTGGGTCGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCTCTCAAATACGGGTGGAGTCTCCTACAATATTGGAGCCAGGAACTAAAGAATAG,8376,8651,DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*,GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG -OQ092466,nef,8876,9508,forward,0.43380952380952387,0,MGGKWSKSKLFGWPAVRERMRRAEPAAEPAADGVGAASRDLERHGAITSSNTPTTNAACAWLEAQEEEEVGFPVRPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSPKRQEILDLWVYHTQGFFPDWDNYTPGPGIRYPLTFGWCFKLVPVEPDKVEEANEGENNCLLHPMSQHGMEDPEREVLEWRFDSRLAFRHVARELHPEYYKDC,ATGGGTGGCAAATGGTCAAAAAGTAAGCTATTTGGATGGCCTGCTGTAAGGGAAAGAATGAGAAGAGCTGAGCCAGCAGCAGAGCCAGCAGCAGATGGGGTGGGAGCAGCATCTCGAGACCTGGAAAGACACGGAGCAATCACAAGTAGCAATACACCAACTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAAGAGGAGGAGGTGGGTTTTCCAGTCAGACCTCAGGTACCTTTAAGACCAATGACTTACAAAGCGGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCCAAAAAGACAAGAGATCCTTGATCTGTGGGTCTATCATACACAAGGTTTCTTCCCTGATTGGGATAACTACACACCAGGGCCAGGGATCAGATATCCACTGACCTTTGGATGGTGCTTCAAGTTAGTACCAGTGGAGCCAGATAAGGTAGAAGAGGCCAATGAAGGAGAGAATAACTGCTTGTTACACCCTATGAGCCAGCATGGGATGGAGGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTGGCATTTCGTCACGTGGCCCGAGAGCTGCATCCGGAGTACTACAAGGACTGCTGA,8795,9415,MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA -OQ092463,gag,801,2312,forward,0.21666666666666679,0,MGARASVLSGGELDRWERIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSKGCRQILEQLQPSLQTGSEELKSLYNTVATLYCVHQKIDVKDTKEALDKIEEEQNKSKKKTQQAAADTGNNSQTSQASQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKESFRDYVDRFYKTLRAEQCTQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTTPGTIMMQRGNFRNQRKNVKCFNCGKEGHIAKNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKQEPVDKELYPLASLRSLFGNDPSSQ,ATGGGTGCGAGAGCGTCGGTATTAAGCGGGGGAGAATTAGATAGATGGGAAAGAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAGCGGTTCGCAGTTAATCCTGGCCTGTTAGAAACATCAAAAGGCTGTAGACAAATATTGGAACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAAGATAGATGTAAAAGACACCAAGGAAGCTTTAGATAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAACACAGCAAGCAGCAGCTGACACAGGAAACAACAGCCAGACCAGCCAGGCCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTGGGACTAAATAAAATAGTAAGGATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAATCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAACAATGTACACAGGAGGTAAAAAATTGGATGACAGAAACATTATTGGTCCAAAATGCAAACCCAGATTGCAAGACTATTTTAAAAGCATTAGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCTGGCCATAAAGCAAGAGTTTTGGCAGAAGCAATGAGCCAAGTAACAACTCCAGGTACCATAATGATGCAGAGAGGCAATTTCAGGAACCAGAGAAAGAATGTTAAGTGTTTCAACTGTGGCAAAGAAGGGCATATAGCTAAAAATTGCAGGGCCCCTAGAAAAAGGGGCTGTTGGAAATGTGGAAAAGAAGGACACCAAATGAAAGATTGCACTGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAGGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGGTAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA,140,1642,MGARASVLSGGQLDRWEKIRLRPGGKKKYRLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTIAVLYCVHQKIEVKDTKEALEKIEEEQNKSKKKAQQAAANTENSSQVSQNYPIVQNMQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQKGNFRNQRKIVKCFNCGKEGHIARNCRAPRKKGCWRCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPAESFRFGEETTTPSQKQETIDKELYPLTALKSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGACAATTAGATAGATGGGAGAAAATTCGGTTACGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTATTGGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCAAGCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAATAGCAGTCCTCTATTGTGTACATCAAAAGATAGAGGTAAAAGACACCAAGGAAGCTTTAGAGAAGATAGAGGAAGAACAAAACAAAAGTAAGAAAAAGGCACAGCAAGCAGCAGCTAACACAGAAAACAGCAGCCAGGTTAGCCAAAATTACCCTATAGTGCAAAATATGCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACACATAATCCACCTATCCCAGTGGGAGAAATTTACAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGCATTTTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAGACTCTAAGAGCCGAGCAAGCCTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGTCCCGGCCATAAGGCAAGAGTTTTGGCGGAAGCAATGAGCCAAGTGACAAATTCAGCTACCATAATGATGCAGAAAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCCGAAATTGCAGGGCCCCTAGGAAGAAGGGCTGTTGGAGATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAAAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCGACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGAGACAATAGACAAGGAACTATATCCTTTAACTGCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAA -OQ092463,pol,2105,5116,forward,0.10756972111553775,0,FFRENLAFPQGEAREFSSEQTRANSPTRRELQVWGGDNNSLSEAGAGRQGTVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEDMTLPGRWKPKMIGGIGGFIKVRQYDQILVEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIDELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTREAELELAENREILKEPVHGVYYDPTKDLIAELQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQRETWDTWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIKKDKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDDHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVTTIHTDNGSNFISTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVYIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKLQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAGGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGGTAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGACATGACTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTTGTAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTAACTCAAATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATCTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATATAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAGTCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATAAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGATGAATTAAGACAACATCTGTTGAAGTGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGATATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGTTGCCAGAAAAGGACAGTTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAAGCAATTATGTAAACTCCTTAGGGGAACCAAGGCACTAACAGAAGTAGTACCACTAACAAGAGAAGCAGAGCTAGAGCTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCAACAAAAGACTTAATAGCAGAGTTACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGGAAATATGCAAGAATGAGAGGTGCCCACACTAATGATGTAAAGCAACTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAGTTTAAACTACCCATACAAAGAGAAACATGGGACACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAATTTGTCAATACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAGAAAGAACCTATAGTAGGAGCAGAAACTTTCTATGTAGATGGTGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGATACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTTAATCAAATAATAGAACAGTTAATAAAAAAGGACAAGGTATACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGACCATGAGAAATATCACAGTAATTGGAGGGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGCCAGTTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTGGATTGTACACACTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGGTATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTATCTTAAAATTAGCAGGAAGATGGCCAGTAACAACAATACATACAGACAATGGCAGCAATTTCATCAGTACTACAGTTAAGGCCGCCTGCTGGTGGGCGGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATCATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATACATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAACTTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG,1435,4446,FFRENLAFPQGKAREFSSEQTRADSPTSRELQVWGRDNNSLSEAGDNRQGTISFNCPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMNLPGRWKPKMIGGIGGFIKVRQYDQILVEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQCSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTKEAELELAENREILKETVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESEIVSQIIEQLIQKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESINKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCGACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGAGACAATAGACAAGGAACTATATCCTTTAACTGCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGGGCAGATGATACAGTATTAGAAGAAATGAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAATATGATCAGATACTTGTAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATTTGTTGACTCAGCTTGGTTGCACTTTAAATTTTCCTATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAAAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTGAAAAAGAAAAAATCAGTAACGGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATAAAGACTTCAGGAAATATACTGCATTTACCATACCGAGTACAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAATGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTGAGGTGGGGATTTACCACACCAGATAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAACCTATAGTGCTGCCGGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAAGCAGTTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAGTAGTACCACTAACAAAAGAGGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTCTAAAAGAAACAGTACATGGAGTGTATTATGACCCATCAAAAGATTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTAAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTCAAACTACCCATACAAAAAGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAAAAAGAGCCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCGGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTGACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAAAAGACTGAGTTACAAGCAATTCACCTAGCTTTGCAGGATTCGGGATTAGAAGTAAATATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGATAGTCAGTCAAATAATAGAGCAGTTAATACAAAAGGAAAAGGTCTACCTGGCATGGGTACCAGCACACAAGGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGAGCATGAAAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAGGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTACTACGGTTAAGGCTGCCTGTTGGTGGGCGGGGATCAAGCAGGAATTTGGCATCCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATAAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAAACAGCAGTACAAATGGCAGTATTTATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAGACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTGTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAA -OQ092463,vif,5061,5639,forward,0.296875,0,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYISRKAKGWVYKHHYDSIHPRISSEVHIPLGDAKLVITTYWGLHTGERDWHLGQGVSIEWRKERYSTQVDPGLADQLIHRYYFDCFSESAIRNAILGRIVSPSCDYQAGHNKVGSLQYLALAALITPKKIKPPLPSVKKLTEDRWNKPQKTKGHRGSHTMNGH,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGGACATGGAAAAGTTTAGTAAAACACCATATGTATATTTCAAGGAAAGCTAAGGGATGGGTTTATAAGCATCACTATGACAGTATTCATCCAAGAATAAGTTCAGAAGTTCACATCCCACTAGGGGATGCGAAATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCACTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAGGAGAGATATAGCACACAAGTAGACCCTGGCCTAGCAGACCAACTAATTCATCGGTATTACTTTGATTGTTTTTCAGAATCTGCCATAAGAAATGCCATATTAGGACGTATAGTTAGTCCTAGTTGTGATTATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTAAGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAG,4391,4969,MENRWQVMIVWQVDRMRIKTWKSLVKHHMYVSKKAKGWLYRHHYQSIHPRISSEVHIPLGEASLVIKTYWGLHTGEREWHLGQGVSIEWRKGRYNTQVDPGLADQLIHLYYFDCFSESAIRNAILGHIVSPSCEYQAGHNKVGSLQYLALTALRTPKKIKPPLPSVRKLTEDRWNKPQKTKGHRESHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAAAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAAAGAAAGCTAAGGGATGGTTGTATAGACATCACTATCAAAGCATTCATCCAAGAATAAGTTCAGAAGTACATATCCCACTAGGAGAGGCTAGCTTGGTAATAAAGACATATTGGGGTCTGCATACAGGAGAAAGAGAATGGCATTTGGGTCAAGGAGTCTCCATAGAATGGAGGAAAGGAAGATATAACACACAAGTAGACCCAGGCCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGTTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCACTAACAGCATTAAGAACACCAAAGAAGATAAAGCCACCTTTGCCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAG -OQ092463,vpr,5579,5869,forward,0.31958762886597936,0,MEQAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHSLGQHIYETYGDTWAGVEAITRILQQLLFIHFRIGCQHSRIGIIQRRRARNGTSRS,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACACTTCCCTAGGCCATGGCTCCACAGCTTAGGGCAACATATCTATGAAACATATGGAGATACTTGGGCAGGAGTGGAAGCCATAACAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGAATTGGGTGTCAACATAGCAGAATAGGCATCATTCAACGGAGGAGAGCAAGAAATGGAACCAGTAGATCCTAG,4909,5199,MEQAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRTWLHGLGQYIYENYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGITLQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACACTTTCCTAGGACATGGCTCCACGGATTAGGGCAATATATCTATGAAAATTATGGGGACACTTGGGCAGGAGTGGAGGCCATAATAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGGATTGGGTGTCGACATAGCAGAATAGGCATTACTCTACAAAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -OQ092463,tat_exon1,5850,6064,forward,0.3918918918918919,0,MEPVDPRLEPWKHPGSQPRTACTSCYCKQCCFHCQVCFITKGLGISYGRKKRRQRRRAHQDSQTHQASLSKX,ATGGAACCAGTAGATCCTAGATTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTGCTTGTACCTCTTGTTATTGTAAGCAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA,5180,5394,MEPVDPRLEPWKHPGSQPKTACNTCYCKKCCFHCQVCFTKKALGISYGRKKRRQRRRAPQDRQTHQASLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAGACGGCTTGTAACACTTGCTATTGTAAAAAATGTTGCTTTCATTGCCAAGTTTGTTTCACAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAGGACCGTCAGACTCATCAAGCTTCTCTATCAAAGCA -OQ092463,rev_exon1,5989,6064,forward,0.23076923076923084,0,MAGRSGDSDEELIKTVRLIKLLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA,5319,5394,MAGRSGDSDEELLRTVRLIKLLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAGGACCGTCAGACTCATCAAGCTTCTCTATCAAAGCA -OQ092463,vpu,6081,6329,forward,0.4878048780487805,0,MQPLEISAIVALVVAIIIAIVVWSIVLIEYRKILRQRKIDRIIDRIRERAEDSGNESEGDQEELSALVVEMGHDAPWDVNDL,ATGCAACCTTTAGAAATATCAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTACTCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGATAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGCGAGGGGGATCAGGAAGAATTATCAGCACTTGTTGTGGAGATGGGGCACGATGCTCCTTGGGATGTTAATGATCTGTAG,5411,5656,MHSLQILGIVALVVAGIIAIVVWSIVIIEYRKILRQRKIDRLIDRIIERAEDSGNESEGDQEELSALVEMGHLAPWDIND**,ATGCACTCTTTACAAATATTAGGAATAGTAGCATTAGTAGTAGCAGGAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGGAAAATAGACAGGTTAATTGATAGAATAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGATCAGGAAGAATTGTCAGCACTTGTGGAGATGGGGCATCTTGCTCCTTGGGATATTAATGATTAGTAG -OQ092463,env,6244,8826,forward,0.4984072810011382,0,MRARGIRKNYQHLLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFYASDAKAYGTEVHNVWATHACVPTDPNPQEVVLGNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNYNNTNTTSTKNTTPSTTASSGERMGEGEIKNCSFNITTSLRDKMQKEYALFYRPDIVPIDNDNTSYRLISCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKKFNGSGPCTNVSTIQCTHGIRPVVSTQLLLNGSLAEEEIVIRSENFSDNAKTIIVHLKDSVEITCIRPNNNTRKSIPMGPGKAFFATGAIIGDIRQAHCKINKTKWNNTLDQIFKKLREQFGNNKTIIFNHSSGGDPEIVTHNFNCGGEFFYCNTSKLFNSTWNETSSWKDESNGTITLPCRIKQIINLWQEVGKAMYAPPIRGLIKCSSNITGLLITRDGGKNNESNTTEIFRPEGGNMKDNWRSELYKYKVVKIEPLGIAPTKAKRRVVQREKRAVGIGAMFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKTLEQIWDNMTWMQWEREIDNYTSLIYTLIEESQNQQEKNELELLELDKWASLWNWFDITKWLWYIKIFIMIVGGLVGLRIVFAVLSIANRVRQGYSPLSFQTHPPAPRGPDRPEGIGEEGGERDRDRSGPLVNGFLAIIWVDLRSLCLFSYHRLRDLLLIVTRIVELLGRGGWELLKYWWNLLQYWSQELKNSAISLLNATAIAVAEGTDRIIEVLQRAGRAFLNIPRRIRQGLERALL,ATGAGAGCGAGGGGGATCAGGAAGAATTATCAGCACTTGTTGTGGAGATGGGGCACGATGCTCCTTGGGATGTTAATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACTACCACTCTATTTTATGCATCAGATGCTAAAGCATATGGTACAGAGGTACATAATGTTTGGGCTACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGGAAATGTGACAGAGAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTGAATTGCACTAATTATAATAACACCAATACCACTAGTACCAAGAATACCACCCCTAGTACCACTGCTAGTAGCGGGGAAAGGATGGGGGAAGGAGAAATAAAAAACTGCTCTTTCAATATCACCACAAGCCTAAGAGATAAGATGCAGAAAGAATATGCACTCTTTTATAGACCTGATATAGTACCAATAGATAATGATAATACTAGTTATAGGTTGATAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCAGCTGGTTTTGCGATTCTAAAGTGTAACAATAAGAAGTTCAATGGATCAGGACCATGTACAAATGTCAGCACAATACAATGTACACATGGAATTAGGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTCTAGCAGAAGAAGAGATAGTAATTAGATCTGAAAATTTCTCAGACAATGCTAAAACCATAATAGTACATCTGAAGGATTCTGTAGAAATTACTTGTATAAGACCCAACAACAATACAAGAAAAAGTATACCTATGGGACCAGGGAAAGCATTTTTTGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAAAATTAATAAAACGAAATGGAATAACACTTTAGACCAGATATTTAAAAAGTTAAGAGAACAATTTGGGAATAATAAGACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTGTAACGCACAATTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACATCAAAACTGTTTAATAGTACTTGGAATGAGACTAGTTCTTGGAAGGATGAGTCAAATGGCACTATCACACTCCCATGCAGAATAAAGCAAATTATAAACCTGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGACTCATTAAATGTTCATCAAATATCACAGGGCTGCTAATAACAAGAGATGGTGGGAAAAATAATGAGAGCAACACCACCGAGATTTTCAGACCTGAAGGAGGAAATATGAAGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAATAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTATGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATAACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAAACTCTGGAACAGATTTGGGATAACATGACCTGGATGCAATGGGAAAGAGAAATTGACAATTACACAAGCTTGATATACACTTTAATTGAAGAATCGCAAAACCAACAAGAAAAGAATGAACTAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTGACATAACAAAGTGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTGCTTTCTATAGCGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTTCAGACCCACCCCCCAGCCCCGAGGGGACCCGACAGGCCAGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGACCATTAGTGAACGGATTCTTAGCAATTATCTGGGTCGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCGGGGGGTGGGAACTCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAAGAATAGTGCTATTAGCTTGCTTAATGCTACAGCCATAGCAGTAGCTGAGGGGACAGACAGGATTATAGAAGTATTACAAAGAGCTGGGAGAGCTTTTCTCAACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGGCTTTGCTATAA,5574,8123,MRVKEIRKNCQHLWRWGILLLGILMISSAAENLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWTNNMAEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDLRNTTNTNSTAEEMEAKGEMKNCSFNITTSIRNKLQKEYALFYKLDIVPINNDNTSYRLISCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNDKKFSGNGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTDNAKTIIVQLKEPVEINCTRPNNYTRKRITMGPGRVYYTTGEIIGDIRRAHCNISSTKWNNTLGQIVKKLKEQFNNNTIVFKKSSGGDPEIVMHSFICGGEFFFCNSTKLFNSTWNSTEGNDDGEERNITLPCRIKQIVNMWQEVGKAMYAPPIGGQIRCTSNITGLLLTRDGGNQNGTNETEIFRPGGGNMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLDEIWNNMTWMQWEREINNYTGLIYTLIEESQNQQEKNELDLLQLDKWASLWNWFDITNWLWYIKIFIMIVGGLVGLRIIFTVLSIVNRVRQGYSPLSFQTHLPAPRGPDRPGGIEEEGGERDRDTSGRLVDGFLAIFWVDLRNLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVLQRVYRAILNIPTRIRQGLERALL*,ATGAGAGTGAAGGAGATCAGGAAGAATTGTCAGCACTTGTGGAGATGGGGCATCTTGCTCCTTGGGATATTAATGATTAGTAGTGCTGCAGAAAATTTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAAGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAAYATGTGGACAAATAACATGGCAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGTCTAAAGCCATGTGTAAAATTAACTCCACTCTGTGTTACTTTAAATTGCACTGATTTGAGAAATACTACTAATACCAATAGTACCGCCGAGGAAATGGAGGCGAAAGGAGAAATGAAAAACTGCTCTTTCAATATCACCACAAGCATAAGGAATAAGTTGCAGAAAGAATATGCACTCTTTTATAAACTTGATATAGTACCAATAAATAATGATAATACTAGCTATAGGTTGATAAGTTGTAACACCTCAGTCATTACCCAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAATGATAAGAAGTTCAGTGGAAACGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTATCAACTCAACTGCTGCTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTTACAGACAATGCTAAAACCATAATAGTACAGCTGAAAGAACCTGTAGAAATTAATTGTACAAGACCTAACAACTATACAAGGAAAAGAATAACTATGGGACCAGGGAGAGTATATTATACAACAGGAGAAATAATAGGAGATATAAGACGAGCACATTGTAACATTAGTAGCACAAAATGGAATAACACTTTAGGACAGATAGTTAAAAAATTAAAAGAACAATTTAACAATAATACAATAGTCTTTAAGAAATCCTCAGGAGGGGACCCAGAAATTGTAATGCACAGTTTTATTTGTGGAGGGGAATTTTTCTTCTGTAATTCAACAAAACTGTTTAATAGTACTTGGAATAGCACTGAAGGAAATGACGATGGAGAGGAAAGAAATATCACACTCCCATGCAGAATAAAACAAATTGTAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCGGAGGACAAATTAGATGCACCTCAAATATTACAGGGCTGCTATTAACAAGAGATGGAGGTAACCAAAATGGGACCAACGAGACTGAAATCTTCAGACCTGGAGGAGGAAATATGAGGGACAATTGGAGAAGTGAACTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACTAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAGGCCAGACTATTATTGTCTGGTATAGTGCAACAGCAGAACAATCTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCTTAGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATCTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGATGAGATTTGGAATAACATGACCTGGATGCAATGGGAAAGAGAAATTAACAATTACACAGGCTTAATATACACCTTAATTGAAGAATCGCAAAACCAACAAGAAAAGAATGAACTAGATTTACTGCAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTGACATAACAAACTGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGATTGGTAGGTTTAAGAATAATTTTTACTGTACTTTCTATAGTGAATAGGGTTAGGCAGGGATACTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGGAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACACATCCGGACGCTTAGTGGATGGATTCTTAGCAATTTTCTGGGTCGATCTGCGGAACCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCTATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTATTACAAAGAGTTTATAGAGCTATTCTCAACATACCTACAAGAATCAGACAGGGCTTGGAAAGGGCTTTGCTATAA -OQ092463,tat_exon2,8408,8500,forward,0.5806451612903225,0,RPTPQPRGDPTGQKESEKKVERETETDPDH,AGACCCACCCCCCAGCCCCGAGGGGACCCGACAGGCCAGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGACCATTAG,7705,7797,RPTSQPRGDPTGPEESKKKVERETETHPDA*,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGGAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACACATCCGGACGCTTAG -OQ092463,rev_exon2,8409,8684,forward,0.326086956521739,0,DPPPSPEGTRQARRNRRRRWRERQRQIRTISERILSNYLGRPAEPVPLQLPPLERLTLDCNEDCGTSGTRGVGTPQVLVESPAVLESGTKE,GACCCACCCCCCAGCCCCGAGGGGACCCGACAGGCCAGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGACCATTAGTGAACGGATTCTTAGCAATTATCTGGGTCGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCGGGGGGTGGGAACTCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAAGAATAG,7706,7981,DPPPSPEGTRQARRNRRRRWRERQRHIRTLSGWILSNFLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGGAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACACATCCGGACGCTTAGTGGATGGATTCTTAGCAATTTTCTGGGTCGATCTGCGGAACCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG -OQ092463,nef,8828,9460,forward,0.31611374407582926,0,MGGKWSKSSVVGWPAIRERMRRAEPAAEPAADGVGAASRDLEKHGAITSSNTAATNADCAWLEAQEEEEVGFPVRPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIYSQKRQDILDLWVYHTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPVEPDKVEEANEGENNSLLHPMSLHGMDDPEKEVLVWRFDSRLAFHHMARELHPEYYKDC,ATGGGTGGTAAGTGGTCAAAAAGTAGTGTGGTTGGATGGCCTGCTATAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGAGCCAGCAGCAGATGGGGTGGGAGCAGCATCTAGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCAGGCCTCAGGTACCTTTAAGACCAATGACTTATAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGTTAATTTACTCCCAGAAAAGACAAGATATCCTTGATCTGTGGGTCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATAAGATATCCACTGACCTTTGGATGGTGCTTCAAGCTAGTGCCAGTTGAGCCAGATAAGGTAGAAGAGGCCAATGAAGGAGAGAACAACAGCTTGTTACACCCTATGAGCCTGCATGGGATGGATGACCCAGAGAAAGAAGTGTTAGTATGGAGATTTGACAGTCGCCTGGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAGTACTACAAGGATTGCTGA,8125,8751,MGSKWSKMSGWPAVRERMRRTKPAEPAADGVGAASRDLEKHGAITSSNTAATNADCAWLEAQEEEGEVGFPVKPQVPLRPMTYKGALDLSHFLKEKGGLEGLIYSQQRQDILDLWVYHTQGYFPDWQNYTPGPGVRFPLTFGWCFKLVPVEPDKVEEANEGENNCLLHPMSQHGMEDPEKEVLMWKFDSRLALHHMAREKHPEYYKDC*,ATGGGTAGCAAGTGGTCAAAAATGAGTGGGTGGCCTGCTGTAAGGGAAAGAATGAGAAGAACTAAGCCAGCTGAGCCAGCAGCAGATGGAGTGGGAGCAGCATCTAGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGGGGAGGTGGGTTTCCCAGTCAAACCTCAGGTACCTTTAAGACCAATGACTTACAAGGGAGCTCTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAACAAAGACAAGATATCCTTGATCTGTGGGTCTACCATACACAAGGCTACTTCCCTGATTGGCAGAATTACACACCAGGGCCAGGGGTCAGATTCCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGAGCCAGACAAGGTAGAAGAGGCCAATGAAGGGGAAAACAACTGCTTGTTACACCCTATGAGCCAGCATGGGATGGAAGACCCGGAGAAAGAAGTGTTAATGTGGAAGTTTGACAGCCGCCTGGCATTGCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAGGACTGCTGA -OQ092465,gag,855,2357,forward,0.3900199600798404,0,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSKGCRQILEQLQPSLQTGSEELKSLYNTVATLYCVHQQIDVKDTKEALDKIEEEQNKSKKKVQQAATDTGNNSQASQNYPIVQNLQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKESFRDYVDRFYKTLRAEQCTQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTTPGAIMMQRGNFRSQRKTVKCFNCGKEGHIAKNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPAESFRFGEETTTPSQKQEPIDKELYPLASLRSLFGNDPSSQ,ATGGGTGCGAGAGCGTCGGTATTAAGCGGGGGAGAATTAGATAGGTGGGAAAAAATTCGGTTAAGGCCAGGAGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAACTAGAACGGTTTGCAGTTAATCCTGGCCTGTTAGAAACATCAAAAGGCTGTAGACAAATATTGGAACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAACAGATAGATGTAAAAGACACCAAGGAAGCTTTAGATAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGTACAGCAAGCAGCAACTGACACAGGAAACAACAGCCAGGCCAGCCAAAATTACCCTATAGTGCAGAATCTCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGACTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGGATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAATCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAACAATGTACACAGGAGGTAAAAAATTGGATGACAGAAACATTATTGGTCCAAAATGCAAACCCAGACTGCAAGACTATTTTAAAAGCATTAGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCTGGCCATAAAGCAAGAGTTTTGGCAGAAGCAATGAGCCAAGTAACAACTCCAGGTGCCATAATGATGCAGAGAGGCAATTTCAGGAGCCAAAGAAAGACTGTTAAGTGTTTCAACTGTGGCAAAGAAGGGCATATAGCTAAAAATTGCAGGGCCCCTAGAAAAAGGGGCTGTTGGAAATGTGGAAAAGAAGGACACCAAATGAAAGATTGCACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTCGGGGAGGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA,200,1699,MGARASVLSGGELDRWEKIRLRPGGKKKYRLKHVVWASRELERFAVNPGLLETSEGCRQILEQLQPSLKTGSEELKSLFNTVAVLYCVHQKIEVKDTKEALDKIEEEQNKSKKKAQQAAAGTGNSNQASQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKGFNPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKDTINEEAAEWDRLHPVHAGPIPPGQMREPRGSDIAGTTSNLQEQIQWMTSNPPVPVGEIYKRWIILGLTTLVGMYSPVSILDIKQGPKDLFRDYVDRFFKTLRLEQCTQEVKGWMTDTLLVPNANPDCKTFLKALGPGPSLEELTTPGPGVGGPSHKARVLAEAMSQTTSAAVMMQKSNFKGQRRIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGRIWPSHKGRPGNFLQSRPEPSAPPEESFRFGEETTTPPQKQEPIDKELYPLASLKSLFGNDQ*SQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATAGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATGTAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTTTTAGAGACATCAGAAGGCTGTAGACAAATACTGGAACAGCTACAACCATCCCTTAAGACAGGATCAGAAGAACTTAAATCATTATTTAATACAGTAGCAGTCCTCTATTGTGTGCATCAAAAGATAGAGGTAAAAGATACCAAAGAAGCTTTAGATAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGCAGCAGCTGGCACAGGAAACAGCAACCAGGCCAGCCAAAATTACCCTATAGTGCAGAACCTTCAGGGGCAAATGGTACATCAACCCCTATCACCTAGAACTTTAAATGCATGGGTAAAGGTGGTAGAAGAGAAGGGTTTTAACCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGGGCCACTCCACAAGATTTAAACACCATGTTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGACACCATCAATGAGGAAGCTGCAGAATGGGACAGATTACATCCAGTGCATGCAGGACCTATCCCACCAGGCCAGATGAGGGAACCTAGGGGAAGTGATATAGCTGGAACAACTAGTAACCTTCAGGAACAAATACAATGGATGACAAGCAACCCACCTGTCCCAGTGGGAGAAATCTATAAAAGATGGATCATCCTAGGATTAACTACACTAGTAGGAATGTATAGCCCTGTCAGCATTTTGGACATAAAACAAGGGCCAAAAGACCTTTTTAGAGACTATGTAGACCGGTTCTTTAAAACCCTAAGACTTGAGCAATGTACACAGGAAGTAAAAGGTTGGATGACAGACACCTTGTTGGTTCCAAATGCGAACCCCGATTGTAAGACCTTTTTAAAAGCTTTGGGCCCAGGGCCTTCACTTGAAGAACTGACGACCCCTGGTCCGGGAGTGGGAGGACCTAGCCATAAGGCAAGAGTTTTGGCTGAGGCAATGAGCCAAACAACAAGTGCAGCTGTAATGATGCAGAAAAGTAACTTTAAGGGCCAAAGAAGAATTGTTAAATGTTTTAATTGTGGCAAAGAAGGACACATAGCCAAAAATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGCACTGAAAGACAGGCTAATTTTTTAGGGAGAATTTGGCCTTCCCACAAGGGAAGGCCCGGGAATTTCCTTCAGAGCAGGCCAGAGCCATCAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAGGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTGGCTTCCCTCAAATCACTCTTTGGCAACGACCAATAGTCACAGTAA -OQ092465,pol,2150,5161,forward,0.1733067729083666,0,FFREDLAFPQGEAREFSSEQTRANSPTSRELQVRGGDNNSLSEAGADRQGTVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEDMTLPGRWKPKMIGGIGGFIKVRQYDQILVEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIDELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCRLLKGTKALTEVVPLTREAELELAENREILKEPVHGVYYDPTKDLIAELQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQRETWDTWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIKKDKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDDHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKTIHTDNGGNFISTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVYIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKLQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,TTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTCGGGGAGGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGACACAGGAGCAGATGATACAGTATTAGAAGACATGACTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTTGTAGAAATCTGTGGACATAAAGCTATAGGTACAGTTTTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAAATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATCTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATATAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATAAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGATGAATTAAGACAACATCTGTTGAAGTGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGGTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGTTGCCAGAAAAAGACAGTTGGACTGTTAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAGGTAAAGCAATTATGTAGACTCCTTAAGGGAACCAAGGCACTAACAGAAGTAGTACCACTAACAAGAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCAACAAAAGACTTAATAGCAGAGTTACAGAAGCAGGGACAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGAGGTGCCCACACTAATGATGTAAAACAGCTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAGTTTAAACTACCCATACAAAGAGAAACATGGGACACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTTTATGTAGATGGTGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACCACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATCCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTTAATCAAATAATAGAACAGTTAATAAAAAAGGACAAAGTATACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGATCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGCCAGTTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTGGATTGTACACACTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTATCTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGTGGCAATTTCATCAGTACTACAGTTAAGGCCGCCTGTTGGTGGGCGGGGATCAAGCAGGAATTTGGCATACCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATCATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATACATACACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAACTTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG,1492,4503,FFRENLAFPQGKAREFPSEQARAISPTRRELQVWGGDNNSPSEAGADRQGTVSFGFPQITLWQRPIVTVKIEGQPKEALLDTGADDTVLEDINLPGKWKPKMIGGIGGFIKVKQYDNISIDICGHKATGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALIEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKKTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKEFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIEQHRTKVEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQCQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIATESIVIWGKIPKFKLPIQKETWETWWIEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKKGKAGYVTDRGRQKVVPLTDTTNQKTELQAIHLALQDSGVEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKILFLDGIDKAQEEHEKYHSNWRSMASDFNLPPIVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKTIHTDNGSNFISNTVKAACWWAGIKQEFGIPYNPQSQGVIESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAGAATTTGGCCTTCCCACAAGGGAAGGCCCGGGAATTTCCTTCAGAGCAGGCCAGAGCCATCAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAGGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTGGCTTCCCTCAAATCACTCTTTGGCAACGACCAATAGTCACAGTAAAGATAGAGGGACAGCCAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGACATAAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAAACAGTATGATAACATATCCATAGACATTTGTGGACACAAGGCTACAGGTACAGTATTAGTAGGGCCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGCTGTACTTTAAATTTCCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAATAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAGGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAGCTTAATAAGAAAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATAAGGAATTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGCAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGAGCAGCATAGAACAAAAGTAGAGGAACTGAGACAACATCTGTTAAGGTGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCCCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAGGACAGCTGGACTGTCAATGACATACAGAAATTAGTAGGAAAATTGAATTGGGCAAGTCAGATTTATCCAGGGATTAAAGTAAGGCAATTATGTAAGCTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAATGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAGTATGCAAGAACGAGGGGTGCCCACACAAATGATGTAAAACAACTAACAGAAGCAGTGCAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAGATTCCTAAATTTAAATTGCCCATACAGAAAGAGACATGGGAAACATGGTGGATAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAAAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGGGAAACTAAAAAAGGAAAAGCAGGATATGTTACTGACAGAGGAAGACAAAAAGTTGTTCCCCTGACGGACACAACAAATCAGAAGACTGAGCTACAAGCAATCCATTTAGCTTTGCAGGATTCGGGAGTAGAGGTAAACATAGTAACAGACTCCCAATATGCATTAGGAATCATCCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTTAGTCAAATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTATTTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGAAAGATACTATTTTTAGATGGAATAGATAAGGCCCAAGAGGAACATGAGAAATATCACAGTAATTGGAGATCAATGGCTAGTGATTTTAACCTGCCACCTATAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAGCTAGATTGTACACACTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATACATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAAGAAACAGCATACTTTATCTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCATCAGTAACACGGTTAAGGCCGCCTGTTGGTGGGCGGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAATAGAATCTATGAATAAGGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAAACAGCAGTGCAAATGGCAGTATTCATTCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGAGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAACTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTGCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -OQ092465,vif,5106,5684,forward,0.3471502590673574,0,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYISKKAKGWIYKHHYDSINPKISSEVHIPLGDAKLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPGLADRLIHLYYFDCFSESAIRNAILGRIVSPSCDYQAGHNKVGSLQYLALTALITPKKIKPPLPSVKKLTEDRWNKPQKTKGHRGSHTMNGH,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGGACATGGAAAAGTTTAGTAAAACACCATATGTATATTTCAAAGAAAGCTAAGGGATGGATTTATAAGCATCACTATGACAGTATTAATCCAAAAATAAGTTCAGAAGTTCACATCCCACTAGGGGATGCAAAATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTGGCCTAGCAGACCGACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACGTATAGTTAGTCCTAGTTGTGATTATCAAGCAGGACATAACAAGGTAGGGTCTCTACAGTACTTGGCACTAACAGCACTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTAAGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAG,4448,5026,MENRWQVMIVWQVDRMRIRTWNSLVKHHVYVSRKARNWVYKHHYESTHPRISSEVHIPLGDAKLVVITYWGLHTGERDWHLGQGVSIEWRKRRYSTQIDPDLADQLIHLYYFDCFSESAIRNAILGRIVRPRCDYQAGHNKVGSLQYLALTALVTPKKIKPPLPSVRKLTEDRWNKPQQIKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATGTGTATGTTTCAAGGAAAGCTAGGAATTGGGTTTATAAACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAAATTAGTAGTAATAACTTATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAGGAGATATAGCACACAAATAGACCCTGACCTAGCAGACCAACTAATTCACCTGTATTATTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACGTATAGTTAGACCTAGGTGTGATTATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCACTAACAGCATTAGTAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACAAGCCCCAGCAGATCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAG -OQ092465,vpr,5624,5914,forward,0.19587628865979378,0,MEQAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHSLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCQHSRIGILQRRRARNGASRS,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACACTTTCCTAGGCCATGGCTACACAGCTTAGGGCAACATATCTATGAAACATATGGAGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGAATTGGGTGTCAACATAGCAGAATAGGCATCCTTCAACGGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG,4966,5256,MEQAPADQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHYRIGCQHSRIGITRQRRARNGASRS*,ATGGAACAAGCCCCAGCAGATCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAGAGCTTTTAGAGGAACTTAAGAATGAAGCTGTTAGACATTTTCCTAGACCATGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTCATTCATTACAGAATTGGATGTCAACATAGCAGAATAGGCATTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -OQ092465,tat_exon1,5895,6109,forward,0.20833333333333326,0,MEPVDPRLEPWKHPGSQPRTACTSCYCKQCCFHCQVCFITKGLGISYGRKKRRQRRRAHQDSQTHQASLSKX,ATGGAGCCAGTAGATCCTAGATTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTGCTTGTACCTCTTGTTATTGTAAGCAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA,5237,5451,MEPVDPRLEPWKHPGSQPRTACTKCYCKKCCFHCQVCFITKGLGISYGRKKRRQRRRTPQDSQTHQVSLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAACACCCAGGGAGTCAGCCTAGGACTGCCTGTACCAAATGCTACTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAAGATAGTCAGACTCATCAAGTTTCTCTATCAAAACA -OQ092465,rev_exon1,6034,6109,forward,0.46153846153846145,0,MAGRSGDSDEELIKTVRLIKLLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA,5376,5451,MAGRSGDSDEELLKIVRLIKFLYQNX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAAGATAGTCAGACTCATCAAGTTTCTCTATCAAAACA -OQ092465,vpu,6126,6374,forward,0.36829268292682915,0,MQPLVISAIVALVVVAIIAIVVWSIVLIEYRKILRQRKIDRIIDRIRERAEDSGNESEGDQEELSALVVEMGHDAPWDVNDL,ATGCAACCTTTAGTAATATCAGCAATAGTAGCATTAGTAGTAGTAGCGATAATAGCAATAGTTGTGTGGTCCATAGTACTCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGATAATTGATAGAATAAGGGAAAGAGCAGAAGACAGTGGCAATGAGAGCGAGGGGGATCAGGAAGAATTATCAGCACTTGTTGTGGAGATGGGGCACGATGCTCCTTGGGATGTTAATGATCTGTAG,5468,5707,MHSLQILAIVALVVVAIIAIVVWTIVLIEYRKILRQRRIDRIIERIRERAEDSGNESEGDQEELLVEMGHDAPWDVNDL*,ATGCACTCTTTACAAATATTAGCAATAGTAGCATTAGTAGTAGTAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGGCAAAGAAGAATAGACAGGATAATTGAGAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGCGAAGGGGACCAGGAAGAATTACTGGTGGAAATGGGGCATGATGCTCCTTGGGATGTTAATGATCTGTAA -OQ092465,env,6289,8880,forward,0.5747747747747753,0,MRARGIRKNYQHLLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYGTEVHNVWATHACVPTDPNPQEVVLGNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNYNNTGANNTTSTNTTTPSTTVSSEERMGEGEIKNCSFNITTSLRDKMQKEYALFYRPDIVPIDNTSYRLISCNTSVITQACPKVTFEPIPIHYCAPAGFAILKCNNKTFNGSGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEIVIRSENFSDNAKNIIVHLNKSVEITCIRPNNNTRKSIPMGPGKAFFATGAIIGDIRQAHCKINKTKWNNTLEQVFKKLREQFGNNKTIIFNHSSGGDPEIVTHNFNCGGEFFYCNTSKLFNSTWNETSYWKGEGSNNDTITLPCRIKQIINLWQEVGKAMYAPPIRGLIKCSSNITGLLLTRDGGNESETTDTETFRPVGGNMKDNWRSELYKYKVVKIEPLGIAPTKAKRRVVQREKRAVGLGAMFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKNLTQIWDNMTWMQWEKEIDNYTSLIYTLIEESQNQQEKNELELLELDKWASLWNWFDITKWLWYIKIFIMIVGGLVGLRIVFAVLSIANRVRQGYSPLSFQTHLPAPRGPDRPEGIEGEGGERDRDRSGPLVNGFLAIIWVDLRSLCLFSYHRLRDLLLIVTRIVELLGRGGWELLKYWWNLLQYWSQELKNSAISLLNATAIAVAEGTDRIIEVLQRAGRAFLHIPRRIRQGLERALL,ATGAGAGCGAGGGGGATCAGGAAGAATTATCAGCACTTGTTGTGGAGATGGGGCACGATGCTCCTTGGGATGTTAATGATCTGTAGCGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACTACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGGTACAGAGGTACATAATGTGTGGGCTACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGGAAATGTGACAGAGAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGTCTAAAACCATGTGTAAAATTAACCCCACTCTGTGTTACTTTGAATTGCACTAATTATAATAACACTGGTGCCAATAATACCACTAGTACCAATACTACCACCCCTAGTACCACTGTTAGTAGCGAGGAAAGGATGGGGGAAGGAGAAATAAAAAACTGCTCTTTCAATATCACCACAAGCCTAAGAGATAAGATGCAGAAAGAATATGCACTCTTTTATAGACCTGATATAGTACCAATAGATAATACTAGTTATAGGTTGATAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTAACCTTTGAGCCAATTCCCATACATTATTGTGCCCCAGCTGGTTTTGCGATTCTAAAGTGTAACAATAAGACGTTCAATGGATCAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTCTAGCAGAAGAAGAGATAGTAATTAGATCTGAAAATTTCTCAGACAATGCTAAAAACATAATAGTACATCTGAATAAATCTGTAGAAATTACTTGTATAAGACCCAACAACAATACAAGAAAAAGTATACCTATGGGACCAGGGAAAGCATTTTTTGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAAAATTAATAAAACGAAATGGAATAACACTTTAGAGCAGGTATTTAAAAAGTTAAGAGAACAATTTGGGAATAATAAGACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTGTAACGCACAATTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACATCAAAACTGTTTAATAGTACTTGGAATGAGACTAGTTATTGGAAGGGTGAAGGGTCAAACAATGACACTATCACACTCCCATGCAGAATAAAGCAAATTATAAACCTGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGAGGACTCATTAAATGTTCATCAAATATCACAGGGCTACTATTAACAAGAGATGGTGGTAACGAGAGCGAGACCACCGACACTGAGACCTTCAGACCTGTAGGAGGAAATATGAAGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAATAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGATTAGGAGCTATGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATAACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAAAATCTGACACAGATTTGGGATAACATGACCTGGATGCAATGGGAAAAAGAAATTGACAATTACACAAGCTTAATATACACCTTAATTGAAGAATCGCAAAACCAACAAGAAAAGAATGAACTAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTGACATAACAAAGTGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTGCTTTCTATAGCGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCAGAAGGAATCGAAGGAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGACCATTAGTGAACGGATTCTTAGCAATTATCTGGGTCGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCGGGGGGTGGGAACTCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAAGAATAGTGCTATTAGCTTGCTTAATGCTACAGCCATAGCAGTAGCTGAGGGGACAGATAGGATTATAGAAGTATTACAAAGAGCAGGGAGAGCTTTTCTCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGGCTTTGCTATAA,5631,8207,MRAKGTRKNYWWKWGMMLLGMLMICNAAEQSWVTVYYGVPVWKEASTTLFCASDASAVDTEVHNVWATHACVPTDPNPQEIVLENVTENFNMWKNNMVDQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNWNTTNKLNATDTNSSRIEEEMKEEMKNCSFNVTSSIGNKMQKEYALFYKLDVVPINNDSTSYTLINCNTSVITQACPKVSFEPIPIHYCTPAGFALLKCNDKKFNGTGPCTNVSTVQCTHGIKPVVSTQLLLNGSLAEEEIVIRSENFSNNAKTIIVQLNETVEITCERPNNNTRKGIHLGFGRALYATGEIIGDIRQAYCNLNRTKWENTLKRIVTKLGEQFKNQNKTITFDLSSGGDPEIMLHSFSCGGEFFYCNTTQLYNSNRKQENNGTWNNNGSNTNDNITLPCRIKQIINRWQEVGKAMYAPPITGAIRCLSNITGLLLTRDGGTRANLSETFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGTIGAMFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTAVPWNTSWSNKSLDEIWNNMTWMQWEKEIDNYTGVIYNLIEEAQNQQEKNEQELLQLDTWASLWNWFNITKWLWYIKIFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTRLPAQRGPDRPGGIEEEGGERDKDRSIRLVDGFLAIIWEDLRSLCLFSYHRLRDLLLIVTRVVELLGRRGWEALKYCWNLLQYWSQELRNSAVSLLNATAIAVAEGTDRIIEIVQIICRAILHIPRRIRQGLERALQ*,ATGAGAGCGAAGGGGACCAGGAAGAATTACTGGTGGAAATGGGGCATGATGCTCCTTGGGATGTTAATGATCTGTAATGCTGCAGAACAATCGTGGGTCACAGTCTACTATGGGGTGCCTGTGTGGAAAGAAGCATCCACCACCTTATTTTGTGCATCAGATGCTAGCGCAGTTGACACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCTAACCCACAGGAAATAGTATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGATCAAATGCATGAAGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATTGGAATACTACTAATAAGTTGAATGCTACTGATACCAATAGTAGTAGAATAGAGGAAGAAATGAAGGAAGAAATGAAAAACTGCTCTTTCAATGTCACCTCAAGCATAGGAAATAAGATGCAGAAAGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAAATAATGATAGTACAAGCTATACACTGATAAATTGTAACACCTCAGTCATTACACAAGCCTGTCCAAAGGTATCCTTTGAACCAATTCCCATACATTATTGTACCCCAGCTGGTTTTGCGCTTCTAAAGTGTAATGATAAGAAATTCAATGGAACAGGACCATGTACAAATGTCAGTACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTCTAGCAGAAGAAGAAATAGTGATTAGATCTGAAAATTTCTCGAACAATGCTAAAACCATAATAGTACAGCTGAATGAAACTGTAGAAATTACTTGTGAAAGACCCAACAACAATACAAGAAAAGGTATACATCTAGGATTTGGGAGAGCATTGTATGCAACAGGAGAAATAATAGGAGATATAAGACAAGCATATTGTAACCTTAATAGAACAAAATGGGAAAACACTTTAAAACGGATAGTTACAAAATTAGGAGAACAATTTAAAAATCAAAATAAAACAATAACCTTCGATCTCTCCTCAGGAGGGGACCCAGAAATTATGCTGCACAGTTTCAGTTGTGGAGGGGAATTCTTCTACTGTAATACAACACAGCTGTATAATAGTAATAGGAAGCAGGAGAATAATGGCACTTGGAATAATAATGGGAGTAATACTAATGATAATATCACACTCCCATGCAGAATAAAACAAATTATAAACAGGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCACAGGAGCAATTAGATGTTTATCAAATATTACAGGGCTGTTATTAACAAGAGATGGTGGAACGAGAGCGAATCTGTCCGAGACCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAACAATAGGAGCTATGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATGACGCTGACGGTACAAGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGGGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTAGAAAGATACCTAAAGGATCAACAGCTCCTAGGGATTTGGGGTTGCTCTGGAAAACTCATCTGCACCACTGCTGTGCCTTGGAATACTAGTTGGAGTAATAAATCTCTAGATGAAATTTGGAACAACATGACCTGGATGCAGTGGGAAAAAGAAATTGACAATTACACAGGCGTAATATACAATTTAATTGAAGAAGCGCAAAACCAACAAGAAAAGAATGAACAAGAATTATTGCAATTGGATACGTGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTATATAAAGATATTCATAATGATAGTAGGAGGCTTAGTAGGTTTAAGAATAGTTTTTGCTGTACTCTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTCCAGACCCGCCTCCCAGCCCAGAGGGGACCCGACAGGCCCGGAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAAGGACAGATCCATAAGATTAGTAGATGGATTCTTAGCAATCATCTGGGAAGACCTACGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTGACGAGGGTTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGCTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAGGAATAGTGCTGTTAGTTTGCTTAATGCCACAGCTATAGCAGTAGCTGAGGGAACAGATAGGATTATAGAAATAGTACAAATAATTTGTAGAGCTATTCTCCACATACCTAGACGAATAAGACAGGGCTTAGAAAGAGCTTTGCAATAA -OQ092465,tat_exon2,8462,8554,forward,0.6875,0,RPTSQPRGDPTGQKESKEKVERETETDPDH,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCAGAAGGAATCGAAGGAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGACCATTAG,7789,7881,RPASQPRGDPTGPEESKKKVERETRTDP*D*,AGACCCGCCTCCCAGCCCAGAGGGGACCCGACAGGCCCGGAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAAGGACAGATCCATAAGATTAG -OQ092465,rev_exon2,8463,8738,forward,0.4301075268817205,0,DPPPSPEGTRQARRNRRRRWRERQRQIRTISERILSNYLGRPAEPVPLQLPPLERLTLDCNEDCGTSGTRGVGTPQVLVESPAVLESGTKE,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCAGAAGGAATCGAAGGAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGACCATTAGTGAACGGATTCTTAGCAATTATCTGGGTCGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCGGGGGGTGGGAACTCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAAGAATAG,7790,8065,DPPPSPEGTRQARRNRRRRWRERQGQIHKISRWILSNHLGRPTEPVPLQLPPLERLTLDCDEGCGTSGTQGVGSPQILLESPAVLESGTKE*,GACCCGCCTCCCAGCCCAGAGGGGACCCGACAGGCCCGGAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAAGGACAGATCCATAAGATTAGTAGATGGATTCTTAGCAATCATCTGGGAAGACCTACGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTGACGAGGGTTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGCTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAGGAATAG -OQ092465,nef,8882,9514,forward,0.3285714285714285,0,MGGKWSKSSVVGWPKIRERMRRAEPAAEPAADGVGAASRDLEKHGAITSSNTAATNADCAWLEAQEEEEVGFPVRPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIYSQKRQDILDLWVYHTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPVEPDKVEEANEGENNSLLHPMSLHGMDDPEKEVLVWRFDSRLAFHHMARELHPEYYKDC,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGGTTGGATGGCCTAAAATAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGAACCAGCAGCAGATGGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCAGGCCTCAGGTACCTTTAAGACCAATGACTTATAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAGAAAAGACAAGATATCCTTGATCTGTGGGTCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATCAGATATCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGAGCCAGATAAGGTAGAAGAGGCCAATGAAGGAGAGAACAACAGCTTGTTACACCCTATGAGCCTGCATGGGATGGATGACCCAGAGAAAGAAGTGTTAGTATGGAGATTTGACAGTCGCCTGGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAGTACTACAAGGACTGCTGA,8209,8841,MGGKWSKCSMGGWTAVRERMRRTEPATEPAADGVGAVSRDLEKHGAITSSNTAATNADCAWLEAQEEEEVGFPVRPQVPLRPMTYKGALDLSHFLKEKGGLEGLIYSQKRQEILDLWVYNTQGYFPDWQNYTKGPGIRYPLTFGWCFKLVPVDPEQVEKANEGENNSLLHPMSQHGMDDPEKEVLMWKFDSRLAFRHVARELHPEYYKDC*,ATGGGTGGCAAGTGGTCAAAATGTAGCATGGGTGGGTGGACTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAACTGAGCCAGCAGCAGATGGGGTGGGAGCAGTATCTCGAGACTTGGAAAAACATGGAGCAATTACAAGTAGCAATACAGCAGCTACTAATGCTGACTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAAGTGGGCTTCCCAGTCAGACCTCAGGTACCTTTAAGACCAATGACTTACAAAGGAGCTTTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAGAAAAGACAAGAGATCCTTGATCTGTGGGTCTACAACACACAAGGATACTTCCCTGATTGGCAGAACTACACAAAAGGGCCAGGGATCAGATATCCACTGACCTTCGGATGGTGCTTCAAGCTAGTGCCAGTTGATCCAGAACAGGTAGAAAAGGCCAATGAAGGAGAGAACAACAGCCTGTTACACCCTATGAGCCAGCATGGGATGGATGACCCAGAGAAAGAAGTGTTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCGCCACGTAGCCAGAGAGCTGCATCCGGAGTATTACAAAGACTGCTGA -OQ092462,gag,767,2269,forward,0.2870775347912524,0,MGARASVLSGGELDKWEKIRLRPGGKKKYQLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPALQTGSEELKSLFHTVATLYCVHQKIEVKDTKEALEKIEEEQKKSKKKAQQAAADTGNNSQVSQNYPIVQNMQGQMVHQPLSPRTLNAWVKVIEDKAFSPEVIPMFTALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVQAGPVAPGQLREPRGSDIAGTTSNLQEQIAWMTHNPPVPVGEIYKRWILLGLNKIVRMYSPVSILDIKQGPKEPFRDYVDRFYKTLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQVANPASIMMQRGNFRNQRKPIKCFNCGKEGHIAKNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKVWPSRKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKPEPIDKELYPLASLRSLFGNDPSSQ,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGACAAATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATCAGCTAAAACATATAGTATGGGCAAGCAGGGAACTAGAGCGATTTGCAGTTAATCCCGGCCTGTTAGAAACATCGGAGGGCTGTAGACAAATACTAGGGCAACTACAGCCCGCTCTTCAGACAGGATCAGAAGAACTTAAATCACTATTTCATACAGTAGCAACCCTCTATTGTGTGCATCAAAAGATAGAGGTGAAAGACACCAAAGAAGCTTTAGAGAAGATAGAGGAAGAGCAAAAGAAAAGTAAGAAAAAGGCACAGCAAGCAGCCGCTGACACAGGAAATAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATGCAGGGACAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAATAGAAGATAAGGCTTTCAGTCCAGAAGTAATACCCATGTTTACAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAATACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGATTACATCCAGTGCAGGCAGGGCCTGTTGCGCCAGGCCAGCTACGAGAGCCAAGGGGAAGTGACATAGCAGGAACTACTAGTAACCTTCAGGAGCAAATAGCATGGATGACACATAATCCACCTGTCCCAGTAGGAGAAATCTATAAAAGATGGATACTTCTGGGATTAAATAAAATAGTAAGAATGTACAGCCCCGTCAGCATTCTGGACATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCTGAGCAGGCTTCACAGGATGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCGAACCCAGATTGTAAGACTATTTTGAAAGCATTAGGACCAGCAGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCAGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAGCAAATCCAGCTAGCATAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGCCTATTAAGTGTTTCAACTGTGGCAAAGAGGGGCATATTGCTAAAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGGTCTGGCCTTCCCGCAAGGGGAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACGACAACTCCCTCTCAGAAACCGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA,789,2291,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA -OQ092462,pol,2062,5073,forward,0.1494023904382471,0,FFREGLAFPQGEAREFPSEQTRANSPTRRELQVWGRDDNSLSETGADRQGTVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMNLPGKWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALTEICTEMEKEGKISKIGPENPYNTPIFAIKKKDGNKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRAKIEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPILLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVVPLTAEAELELAENREILKEPVHGVYYDPSKDLIAELQKQGQGQWTYQIYQEPYKNLKTGKYARMRGTHTNDIKQLTEAVQKIATESIVIWGKTPKFKLPIQKETWEAWWIEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDKGRQKVVPLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPIVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLKGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQEVIESMNKELKKIIGQVRDQAEHLKTAVQMAVFVHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,TTTTTTAGGGAAGGTCTGGCCTTCCCGCAAGGGGAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACGACAACTCCCTCTCAGAAACCGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGAGGGCAACTAAAGGAAGCTCTATTAGACACAGGAGCAGATGATACAGTTTTAGAAGAAATGAATTTGCCAGGAAAATGGAAGCCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTGTTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAAATTGGCTGCACTTTAAATTTTCCCATTAGCCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAGAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAACAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATCTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAATATTTGCCATAAAGAAGAAAGATGGTAATAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCTGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATAAGGACTTCAGGAAGTATACTGCATTTACAATACCTAGCACAAACAATGAGACACCAGGGATTAGATACCAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATTTTGGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATTTATCAATACATGGATGATTTATATGTAGGGTCTGACTTAGAAATAGGGCAGCATAGAGCAAAAATAGAGGAACTGAGACAACATCTATTGAAGTGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCCGATAAATGGACAGTACAGCCTATATTGCTGCCAGAAAAAGACAGCTGGACTGTTAATGACATACAGAAGTTAGTGGGAAAATTGAACTGGGCAAGTCAGATTTACCCAGGAATCAAAGTAAGGCAGCTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAGTGGTACCACTAACAGCAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGATTTAATAGCAGAACTACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATATAAAAATCTGAAAACAGGAAAGTATGCAAGAATGAGAGGTACCCACACTAATGATATAAAACAATTAACAGAGGCAGTGCAAAAAATAGCTACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAATTACCCATACAAAAGGAAACATGGGAAGCATGGTGGATAGAGTATTGGCAAGCCACCTGGATTCCTGAATGGGAGTTTGTCAATACCCCTCCCTTAGTGAAACTATGGTACCAGTTGGAAAAAGAACCCATAGTGGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGGGAAACTAAATTAGGAAAAGCAGGATATGTTACTGACAAAGGAAGACAAAAAGTTGTCCCCCTAACGGACACAACAAATCAGAAGACTGAGCTACAAGCAATTCATCTAGCTTTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAACTAGTCAGTCAAATAATAGAGCAGTTAATAAAGAAGGAAAAGGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAGTTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGAAGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGACTTTAACCTGCCACCTATAGTAGCAAAGGAAATAGTAGCCAGTTGTGACAAATGTCAGTTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCTGGAATATGGCAGCTAGATTGTACACATCTAAAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAAGAAACAGCATACTTTCTCTTGAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCTGCCTGTTGGTGGGCGGGGATCAAACAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGAAGTAATAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCAGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCGTCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACCAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATTATCAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG,2084,5095,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -OQ092462,vif,5018,5596,forward,0.34375,0,MENRWQVMIVWQVDRMRIRTWKSLVKHHMHISRKAKNWLYRHHYESIHPRISSEVHIPLGDARLVITTYWGLLTGERDWHLGQGVSIEWRERRYRTQVDPDLADQLIHLYYFDCFSESAIRNALLGRVVSPRCEYQAGHNQVGSLQYLALTALITPKRRKPPLPSVRKLTEDRWNKPQKTKGHKGSHTMNGH,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAGAGTTTAGTAAAACATCATATGCATATATCAAGGAAAGCTAAGAATTGGTTGTATAGACATCACTATGAAAGCATTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTAGTAATAACAACATATTGGGGTCTGCTTACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGGAAAGGAGATATAGAACACAAGTAGACCCTGACCTAGCAGACCAACTAATTCATCTGTATTATTTTGATTGTTTTTCAGAGTCTGCTATAAGAAATGCCTTATTAGGACGTGTAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACCAGGTAGGATCTCTACAGTACTTGGCACTAACAGCATTAATAACACCAAAAAGGAGAAAGCCACCTTTGCCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGGCACTAG,5040,5618,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG -OQ092462,vpr,5536,5826,forward,0.21875,0,MEQAPEDQGPQREPYNEWALELLEELKSEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGINLQRRARNGSSRS,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGGCACTAGAACTTTTAGAGGAACTTAAAAGTGAAGCTGTTAGACATTTTCCTAGGATATGGCTTCATGGATTGGGACAGCATATCTATGAAACATATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGAATTGGATGTCGACATAGCAGAATAGGCATTAATCTACAGAGGAGAGCAAGGAATGGATCCAGTAGATCCTAG,5558,5848,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -OQ092462,tat_exon1,5807,6021,forward,0.375,0,MDPVDPSLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFIKKGLGISYGRKKRRQRRRASQGSETHQVSLPKX,ATGGATCCAGTAGATCCTAGCCTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAGACTGCTTGTACCAATTGTTATTGTAAAAAGTGCTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGGCAGTGAGACTCATCAAGTTTCTCTACCAAAGCA,5829,6043,MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -OQ092462,rev_exon1,5946,6021,forward,0.46153846153846145,0,MAGRSGDSDEELLKAVRLIKFLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGGCAGTGAGACTCATCAAGTTTCTCTACCAAAGCA,5968,6043,MAGRSGDSDEELIRTVRLIKLLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -OQ092462,vpu,6038,6283,forward,0.7471264367816093,0,MRPLEIAAIVALVVAVLIAIVVWSIVLIEYRKILRQRKIDRIVDRIRERAEDSGNESEGDQEELSALVERGHLAPWDIDDL,ATGCGACCTTTAGAAATAGCAGCAATAGTAGCACTAGTAGTAGCAGTACTAATAGCAATAGTTGTGTGGTCTATAGTACTCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGATAGTGGATAGAATAAGAGAAAGAGCAGAAGATAGTGGAAATGAGAGTGAAGGGGATCAGGAGGAATTATCAGCACTGGTGGAGAGGGGGCACCTTGCTCCTTGGGATATTGATGATCTGTAG,6060,6308,MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*,ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG -OQ092462,env,6201,8777,forward,0.6409745293466222,33,MRVKGIRRNYQHWWRGGTLLLGILMICSATENLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPSPQEVVLKNVTEKFNMWKNNMVEQMHQDIISLWDESLKPCVKLTPLCVTLNCTNATISGNATEEIKNCSFNVNTKIGGKKQKERALFYKLDVVPIDDDSTNRTNTSYRLISCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCGDKEFNGTGLCRNVSTVQCTHGIRPVVSTQLLLNGSLAEGEVVIKSENITNNVKTIIVQLNETVSINCTRPSNNTRRSIHMGPGRAFYATGEIIGDIRKAQCILNKTDWSDTLTRIAKKLHKQFHGPIAFEQSSGGDPEITMHTFNCGGEFFYCNTSALFSGTWNGTAWTNATWGNIAGNNITLQCRIKQFINRWQEVGKAMYAPPIRGEIRCSSNITGLLLTRDGGSNTTNGGENGTQIGENVTQIFRPGGGDMRDNWRSELYKYKVVKIEPLGIAPTKAKRRVVQREKRAVTFGALFLGFLGAAGSTMGAASMTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLRDQQLLGIWGCSGKLICTTTVPWNASWSNKTLEKIWGNMTWMEWEREIDNYTDLIYTLIEQSQNQQEKNEQELLELDKWAGLWNWFDITNWLWYIKIFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTRLPASRGPDRPEGTDEEGGERDRDRSGSLVNGFLALIWIDLRSLFLFSYLRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNAIAISVAEGTDRIIEAIQRICRAILHIPTRIRQGLERALL,ATGAGAGTGAAGGGGATCAGGAGGAATTATCAGCACTGGTGGAGAGGGGGCACCTTGCTCCTTGGGATATTGATGATCTGTAGTGCCACAGAGAACTTGTGGGTCACAGTCTACTATGGGGTACCTGTGTGGAAAGAAGCAACCACCACTTTATTTTGTGCATCAGATGCCAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAGCCCACAAGAAGTAGTATTGAAAAATGTGACAGAAAAGTTTAATATGTGGAAAAATAACATGGTAGAACAGATGCATCAGGATATAATCAGTTTATGGGATGAAAGCCTAAAACCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATGCTACTATCAGTGGTAATGCAACAGAAGAAATAAAAAACTGCTCTTTCAATGTCAATACAAAAATAGGAGGTAAGAAGCAGAAAGAACGTGCACTTTTTTATAAACTTGATGTAGTACCAATAGATGATGATAGTACTAATAGGACTAATACCAGCTATAGGTTGATAAGTTGTAACACTTCAGTCATTACACAAGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTGGAGATAAAGAGTTCAATGGAACAGGACTATGTAGAAATGTCAGCACAGTCCAATGTACACATGGAATCAGGCCAGTAGTATCAACTCAATTGCTGTTGAATGGCAGTCTAGCAGAAGGAGAGGTAGTAATTAAATCTGAAAATATCACGAACAATGTTAAAACCATAATAGTACAGCTAAATGAAACTGTATCAATTAATTGTACAAGACCTAGCAACAATACAAGAAGAAGCATACATATGGGACCAGGGAGAGCCTTTTATGCAACAGGAGAAATAATAGGAGATATAAGGAAAGCACAGTGTATCCTGAATAAGACAGACTGGAGTGACACTTTAACAAGGATAGCTAAAAAATTACACAAGCAATTTCATGGACCAATAGCATTTGAGCAATCCTCAGGAGGGGACCCTGAAATTACAATGCACACTTTTAATTGTGGAGGGGAATTTTTCTACTGCAACACATCAGCGTTGTTTAGCGGGACCTGGAATGGTACTGCTTGGACTAATGCTACTTGGGGTAATATTGCAGGTAACAATATCACACTCCAATGCAGAATAAAACAATTTATAAACAGATGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGAGGAGAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGCAGTAACACAACAAATGGTGGCGAGAATGGGACCCAGATTGGCGAGAATGTGACCCAGATCTTCAGACCTGGAGGAGGGGATATGAGGGACAATTGGAGAAGTGAATTATACAAATATAAAGTAGTAAAAATTGAGCCATTAGGAATAGCACCCACCAAGGCAAAAAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTAACATTCGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATGACACTGACGGTACAGGCCAGACTATTATTGTCTGGTATAGTGCAACAGCAGAACAATCTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAACAGCTTCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAGGGATCAACAGCTCCTAGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTACTGTGCCTTGGAATGCTAGTTGGAGTAATAAAACTCTGGAAAAAATTTGGGGGAACATGACCTGGATGGAGTGGGAGAGAGAAATTGACAATTATACAGACTTAATATACACCTTAATTGAACAATCGCAGAACCAACAAGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAGGCTTGTGGAATTGGTTTGACATAACAAACTGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGTTTAGTAGGTTTAAGAATAGTTTTTGCTGTGCTTTCTATAGTAAATAGAGTTAGGCAGGGATACTCACCATTATCATTCCAGACCCGCCTCCCAGCCTCGAGGGGACCCGACAGGCCCGAAGGAACCGACGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGATCATTAGTGAACGGATTCTTAGCACTCATCTGGATCGACCTGCGGAGTCTGTTCCTCTTCAGCTACCTCCGCTTGAGAGACTTACTCTTGATTGCAGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAGGCCCTCAAATATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGTTTGCTTAATGCTATAGCTATATCAGTAGCGGAGGGAACAGATAGGATTATAGAAGCAATACAAAGAATTTGTAGAGCTATCTTACACATACCTACAAGGATAAGACAGGGCTTAGAAAGGGCTTTGCTATAA,6223,8793,MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*,ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA -OQ092462,tat_exon2,8359,8451,forward,0.40625,0,RPASQPRGDPTGPKEPTKKVERETETDPDH,AGACCCGCCTCCCAGCCTCGAGGGGACCCGACAGGCCCGAAGGAACCGACGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGATCATTAG,8375,8467,RPTSQPRGDPTGPKEPKKKVERETETDPFD*,AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG -OQ092462,rev_exon2,8360,8635,forward,0.3913043478260869,0,DPPPSLEGTRQARRNRRRRWRERQRQIRIISERILSTHLDRPAESVPLQLPPLERLTLDCSEDCGTSGTQGVGGPQILVESPAVLESGTKE,GACCCGCCTCCCAGCCTCGAGGGGACCCGACAGGCCCGAAGGAACCGACGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGATCATTAGTGAACGGATTCTTAGCACTCATCTGGATCGACCTGCGGAGTCTGTTCCTCTTCAGCTACCTCCGCTTGAGAGACTTACTCTTGATTGCAGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAGGCCCTCAAATATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAAGAATAG,8376,8651,DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*,GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG -OQ092462,nef,8779,9405,forward,0.6323809523809525,0,MGGKGSKMRGWVAVREKMRRTKPEDEPAANGVGAASRDLEKYGALTSSNTVATNADLAWLEAQEEEEVGFPVRPQVPLRPMTYKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVHHTQGYLPDWQNYTPGPGIRYPLTFGWCFKLVPVEPDQVEEANAGENNSLLHPMSLHGIEDPEKEVLMWKFDSHLAFRHMARELHPEYYKDC,ATGGGTGGCAAGGGGTCAAAAATGAGGGGATGGGTTGCTGTAAGGGAAAAAATGAGGCGAACTAAGCCAGAAGATGAGCCAGCAGCAAATGGGGTGGGGGCAGCATCTCGAGACTTGGAGAAATATGGCGCACTCACAAGTAGCAATACAGTAGCTACTAATGCTGATTTAGCTTGGCTAGAAGCACAAGAGGAAGAGGAGGTGGGCTTTCCAGTCAGACCTCAAGTACCTTTAAGACCAATGACTTACAAGGGAGCTCTAGATCTCAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGGCAAGATATCCTTGATCTGTGGGTTCACCACACACAAGGCTATCTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATCAGATATCCCCTGACTTTTGGATGGTGCTTCAAGCTAGTACCAGTTGAGCCAGATCAGGTAGAAGAGGCCAATGCAGGAGAGAACAACAGCTTGTTACACCCTATGAGCCTGCATGGGATAGAGGACCCGGAGAAAGAAGTCTTAATGTGGAAGTTTGACAGCCACCTAGCATTCCGTCACATGGCCCGAGAGCTGCATCCGGAGTATTACAAAGACTGCTGA,8795,9415,MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA -OQ092464,gag,773,2275,forward,0.4019960079840321,0,MGARASVLSGGELDRWERIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSKGCRQILEQLQPALQTGSEELKSLYNTVATLYCVHQKIDVKDTKEALDKIEEEQNKSKKKAQQAAADTGNNSQASQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQCTQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTTPGTIMMQRGNLRSQRKTVKCFNCGKEGHIAKNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKQEPIDKEMYPLASLRSLFGNDPSSQ,ATGGGTGCGAGAGCGTCGGTATTAAGCGGGGGAGAATTAGATAGATGGGAAAGAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGGTTCGCAGTTAATCCTGGCCTGTTAGAAACATCAAAAGGCTGTAGACAAATATTGGAACAGCTACAGCCAGCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAAGATCGATGTAAAAGACACCAAGGAAGCTTTAGATAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGAAACAACAGCCAGGCCAGCCAAAATTACCCTATAGTGCAGAACATTCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCGCTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAACAATGTACACAGGAGGTAAAAAATTGGATGACAGAAACATTATTGGTCCAAAATGCAAACCCAGATTGCAAGACTATTTTAAAAGCATTAGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCTGGCCATAAAGCAAGAGTTTTGGCAGAAGCAATGAGCCAAGTAACAACTCCAGGTACCATAATGATGCAGAGAGGCAATTTAAGGAGCCAAAGAAAGACTGTTAAGTGTTTCAACTGTGGCAAAGAAGGGCATATAGCTAAAAATTGCAGGGCTCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAAGAAGGACACCAAATGAAAGATTGCACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGATAGACAAAGAAATGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA,200,1699,MGARASVLSGGELDRWEKIRLRPGGKKKYRLKHVVWASRELERFAVNPGLLETSEGCRQILEQLQPSLKTGSEELKSLFNTVAVLYCVHQKIEVKDTKEALDKIEEEQNKSKKKAQQAAAGTGNSNQASQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKGFNPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKDTINEEAAEWDRLHPVHAGPIPPGQMREPRGSDIAGTTSNLQEQIQWMTSNPPVPVGEIYKRWIILGLTTLVGMYSPVSILDIKQGPKDLFRDYVDRFFKTLRLEQCTQEVKGWMTDTLLVPNANPDCKTFLKALGPGPSLEELTTPGPGVGGPSHKARVLAEAMSQTTSAAVMMQKSNFKGQRRIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGRIWPSHKGRPGNFLQSRPEPSAPPEESFRFGEETTTPPQKQEPIDKELYPLASLKSLFGNDQ*SQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATAGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATGTAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTTTTAGAGACATCAGAAGGCTGTAGACAAATACTGGAACAGCTACAACCATCCCTTAAGACAGGATCAGAAGAACTTAAATCATTATTTAATACAGTAGCAGTCCTCTATTGTGTGCATCAAAAGATAGAGGTAAAAGATACCAAAGAAGCTTTAGATAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGCAGCAGCTGGCACAGGAAACAGCAACCAGGCCAGCCAAAATTACCCTATAGTGCAGAACCTTCAGGGGCAAATGGTACATCAACCCCTATCACCTAGAACTTTAAATGCATGGGTAAAGGTGGTAGAAGAGAAGGGTTTTAACCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGGGCCACTCCACAAGATTTAAACACCATGTTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGACACCATCAATGAGGAAGCTGCAGAATGGGACAGATTACATCCAGTGCATGCAGGACCTATCCCACCAGGCCAGATGAGGGAACCTAGGGGAAGTGATATAGCTGGAACAACTAGTAACCTTCAGGAACAAATACAATGGATGACAAGCAACCCACCTGTCCCAGTGGGAGAAATCTATAAAAGATGGATCATCCTAGGATTAACTACACTAGTAGGAATGTATAGCCCTGTCAGCATTTTGGACATAAAACAAGGGCCAAAAGACCTTTTTAGAGACTATGTAGACCGGTTCTTTAAAACCCTAAGACTTGAGCAATGTACACAGGAAGTAAAAGGTTGGATGACAGACACCTTGTTGGTTCCAAATGCGAACCCCGATTGTAAGACCTTTTTAAAAGCTTTGGGCCCAGGGCCTTCACTTGAAGAACTGACGACCCCTGGTCCGGGAGTGGGAGGACCTAGCCATAAGGCAAGAGTTTTGGCTGAGGCAATGAGCCAAACAACAAGTGCAGCTGTAATGATGCAGAAAAGTAACTTTAAGGGCCAAAGAAGAATTGTTAAATGTTTTAATTGTGGCAAAGAAGGACACATAGCCAAAAATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGCACTGAAAGACAGGCTAATTTTTTAGGGAGAATTTGGCCTTCCCACAAGGGAAGGCCCGGGAATTTCCTTCAGAGCAGGCCAGAGCCATCAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAGGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTGGCTTCCCTCAAATCACTCTTTGGCAACGACCAATAGTCACAGTAA -OQ092464,pol,2068,5079,forward,0.1733067729083666,0,FFREDLAFPQGEAREFSSEQTRANSPTRRELQVWGRDNNSLSEAGADRQRNVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEDMTLPGRWKPKMIGGIGGFIKVRQYDQILVEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVIPLTREAELELAENREILKEPVHGVYYDPTKDLIVEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFRLPIQRETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIKKDKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDDHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGGNFISTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVYIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKLQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,TTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGATAGACAAAGAAATGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGACATGACTTTACCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCGTAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTAACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATCTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATATAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATAAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAATTAAGACAACATCTGTTGAAGTGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGATATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAGGACAGTTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAAGCAATTATGTAAACTCCTTAGGGGAACCAAGGCACTAACAGAAGTAATACCACTAACAAGAGAAGCAGAGCTAGAACTGGCAGAGAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCAACAAAAGACTTAATAGTAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGAGGTGCCCACACTAATGATGTAAAACAACTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAGTTTAGACTACCCATACAAAGAGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAGACTTTCTATGTAGATGGTGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTTAATCAAATAATAGAACAGTTAATAAAAAAGGACAAGGTATACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGACGATCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGACTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGCCAGTTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACACTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCGGCAATTTCATCAGTACTACAGTTAAGGCCGCCTGCTGGTGGGCGGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATACATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAGAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAACTTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGACCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG,1492,4503,FFRENLAFPQGKAREFPSEQARAISPTRRELQVWGGDNNSPSEAGADRQGTVSFGFPQITLWQRPIVTVKIEGQPKEALLDTGADDTVLEDINLPGKWKPKMIGGIGGFIKVKQYDNISIDICGHKATGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALIEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKKTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKEFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIEQHRTKVEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQCQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIATESIVIWGKIPKFKLPIQKETWETWWIEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKKGKAGYVTDRGRQKVVPLTDTTNQKTELQAIHLALQDSGVEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKILFLDGIDKAQEEHEKYHSNWRSMASDFNLPPIVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKTIHTDNGSNFISNTVKAACWWAGIKQEFGIPYNPQSQGVIESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAGAATTTGGCCTTCCCACAAGGGAAGGCCCGGGAATTTCCTTCAGAGCAGGCCAGAGCCATCAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAGGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTGGCTTCCCTCAAATCACTCTTTGGCAACGACCAATAGTCACAGTAAAGATAGAGGGACAGCCAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGACATAAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAAACAGTATGATAACATATCCATAGACATTTGTGGACACAAGGCTACAGGTACAGTATTAGTAGGGCCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGCTGTACTTTAAATTTCCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAATAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAGGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAGCTTAATAAGAAAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATAAGGAATTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGCAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGAGCAGCATAGAACAAAAGTAGAGGAACTGAGACAACATCTGTTAAGGTGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCCCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAGGACAGCTGGACTGTCAATGACATACAGAAATTAGTAGGAAAATTGAATTGGGCAAGTCAGATTTATCCAGGGATTAAAGTAAGGCAATTATGTAAGCTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAATGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAGTATGCAAGAACGAGGGGTGCCCACACAAATGATGTAAAACAACTAACAGAAGCAGTGCAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAGATTCCTAAATTTAAATTGCCCATACAGAAAGAGACATGGGAAACATGGTGGATAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAAAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGGGAAACTAAAAAAGGAAAAGCAGGATATGTTACTGACAGAGGAAGACAAAAAGTTGTTCCCCTGACGGACACAACAAATCAGAAGACTGAGCTACAAGCAATCCATTTAGCTTTGCAGGATTCGGGAGTAGAGGTAAACATAGTAACAGACTCCCAATATGCATTAGGAATCATCCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTTAGTCAAATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTATTTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGAAAGATACTATTTTTAGATGGAATAGATAAGGCCCAAGAGGAACATGAGAAATATCACAGTAATTGGAGATCAATGGCTAGTGATTTTAACCTGCCACCTATAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAGCTAGATTGTACACACTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATACATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAAGAAACAGCATACTTTATCTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCATCAGTAACACGGTTAAGGCCGCCTGTTGGTGGGCGGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAATAGAATCTATGAATAAGGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAAACAGCAGTGCAAATGGCAGTATTCATTCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGAGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAACTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTGCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -OQ092464,vif,5024,5602,forward,0.30051813471502586,0,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYISRKAKGWVYKHHYDSTHPRISSEVHIPLGDAKLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPGLADQLIHLYYFDCFSESAIRNAILGHIVSPSCDYQAGHNKVGSLQYLALAALITPKKIKPPLPSVKKLTEDRWNKPQKTKGHRGSHTMNGH,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGGACATGGAAAAGTTTAGTAAAACACCATATGTATATTTCAAGGAAAGCTAAGGGATGGGTTTATAAGCATCACTATGACAGTACTCATCCAAGAATAAGTTCAGAAGTTCACATCCCGCTAGGGGATGCGAAATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAGAAGAGATATAGCACACAAGTAGACCCTGGCTTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGCTGTGATTATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTAAGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAG,4448,5026,MENRWQVMIVWQVDRMRIRTWNSLVKHHVYVSRKARNWVYKHHYESTHPRISSEVHIPLGDAKLVVITYWGLHTGERDWHLGQGVSIEWRKRRYSTQIDPDLADQLIHLYYFDCFSESAIRNAILGRIVRPRCDYQAGHNKVGSLQYLALTALVTPKKIKPPLPSVRKLTEDRWNKPQQIKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATGTGTATGTTTCAAGGAAAGCTAGGAATTGGGTTTATAAACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAAATTAGTAGTAATAACTTATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAGGAGATATAGCACACAAATAGACCCTGACCTAGCAGACCAACTAATTCACCTGTATTATTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACGTATAGTTAGACCTAGGTGTGATTATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCACTAACAGCATTAGTAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACAAGCCCCAGCAGATCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAG -OQ092464,vpr,5542,5832,forward,0.2268041237113403,0,MEQAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHSLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGIIQRRRARNGASRS,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACACTTTCCTAGGCCATGGCTCCACAGCTTAGGGCAACATATCTATGAAACATATGGAGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCATCATTCAACGGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG,4966,5256,MEQAPADQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHYRIGCQHSRIGITRQRRARNGASRS*,ATGGAACAAGCCCCAGCAGATCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAGAGCTTTTAGAGGAACTTAAGAATGAAGCTGTTAGACATTTTCCTAGACCATGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTCATTCATTACAGAATTGGATGTCAACATAGCAGAATAGGCATTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -OQ092464,tat_exon1,5813,6027,forward,0.20833333333333326,0,MEPVDPRLEPWKHPGSQPRTACTSCYCKQCCFHCQVCFITKGLGISYGRKKRRQRRRAPPDSQTHQASLSKX,ATGGAGCCAGTAGATCCTAGATTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTGCTTGTACCTCTTGTTATTGTAAGCAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCCAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA,5237,5451,MEPVDPRLEPWKHPGSQPRTACTKCYCKKCCFHCQVCFITKGLGISYGRKKRRQRRRTPQDSQTHQVSLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAACACCCAGGGAGTCAGCCTAGGACTGCCTGTACCAAATGCTACTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAAGATAGTCAGACTCATCAAGTTTCTCTATCAAAACA -OQ092464,rev_exon1,5952,6027,forward,0.46153846153846145,0,MAGRSGDSDEELLQTVRLIKLLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCCAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA,5376,5451,MAGRSGDSDEELLKIVRLIKFLYQNX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAAGATAGTCAGACTCATCAAGTTTCTCTATCAAAACA -OQ092464,vpu,6044,6289,forward,0.4841463414634146,0,MQPLHIAAIVALVVAIIIAIVVWSIVFIEYRKILRQRKIDRIIDRIRERAEDSGNESEGDQEELSALVEMGHCAPWDVNDL,ATGCAACCTTTACACATAGCAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCTATAGTATTCATAGAATATAGGAAAATATTAAGACAAAGGAAAATAGACAGGATAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAGGGGGATCAAGAAGAATTATCAGCACTTGTGGAGATGGGGCATTGTGCTCCTTGGGATGTTAATGATCTGTAG,5468,5707,MHSLQILAIVALVVVAIIAIVVWTIVLIEYRKILRQRRIDRIIERIRERAEDSGNESEGDQEELLVEMGHDAPWDVNDL*,ATGCACTCTTTACAAATATTAGCAATAGTAGCATTAGTAGTAGTAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGGCAAAGAAGAATAGACAGGATAATTGAGAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGCGAAGGGGACCAGGAAGAATTACTGGTGGAAATGGGGCATGATGCTCCTTGGGATGTTAATGATCTGTAA -OQ092464,env,6207,8789,forward,0.546485260770976,0,MRVRGIKKNYQHLWRWGIVLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLGNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNYNNTNNTTSTNDTTSTTTSSGEKMKEGEIKNCSFNITTSIRDKVQKEYALFYKPDIVPIDNDNTSYRLISCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNDKKFNGTGSCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSENFSDNAKTIIVHLNESVEITCIRPNNNTRKSIPIGPGRAFYATGAIIGDIRQAHCKINKTKWNNTLDQIFKKLREQFGNKTIIFTHSSGGDPEVVTHNFNCGGEFFYCNTTKLFNSTWNETSYWKDERSNDNDTITLPCRIKQIINLWQEVGKAMYAPPIRGYIKCSSNITGLLITRDGGKNESNTTETFRPGGGNMRDNWRSELYKYKVVKIEPLGIAPTKAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASITLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWDNMTWMEWEREIDNYTSLIYTLIEKSQNQQEKNEQELLELDKWASLWNWFDITKWLWYIKIFIMIVGGLVGLRIVFAVLSIANRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSGPLVNGFLTIIWVDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWELLKYWWNLLQYWSQELKNSAISLLNATAIAVAEGTDRIIEVLQRAGRAFLHIPRRIRQGLERALL,ATGAGAGTGAGGGGGATCAAGAAGAATTATCAGCACTTGTGGAGATGGGGCATTGTGCTCCTTGGGATGTTAATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACTACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCTACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGGAAATGTGACAGAGAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGTCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTGAATTGCACTAATTATAATAACACCAATAATACCACTAGTACCAATGATACCACTAGTACCACTACTAGTAGCGGGGAAAAGATGAAGGAAGGAGAGATAAAAAACTGCTCTTTCAATATCACCACAAGCATAAGAGATAAGGTGCAGAAAGAATATGCACTCTTTTATAAACCTGATATAGTACCAATAGATAATGATAATACTAGTTATAGGTTGATAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCAGCTGGTTTTGCGATTCTAAAGTGTAACGATAAGAAGTTCAATGGAACAGGATCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCTCAGACAATGCTAAAACCATAATAGTACATCTGAATGAATCTGTAGAAATTACTTGTATAAGACCCAACAACAATACAAGAAAAAGTATACCTATAGGACCAGGGAGAGCATTTTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAAAATTAATAAAACGAAATGGAATAACACTTTAGACCAGATATTTAAAAAGTTAAGAGAACAATTTGGGAATAAGACAATAATCTTTACTCACTCCTCAGGAGGGGACCCAGAAGTTGTAACGCACAATTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACAACAAAACTGTTTAATAGTACTTGGAATGAGACTAGTTATTGGAAGGATGAAAGGTCAAATGACAATGACACTATCACACTCCCATGCAGAATAAAGCAAATTATAAACCTGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGATACATTAAATGTTCATCAAATATCACAGGGCTGCTAATAACAAGAGATGGTGGGAAAAACGAGAGCAACACCACCGAGACCTTCAGACCTGGAGGAGGAAATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAATAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATAACGCTGACGGTACAGGCCAGACTATTATTGTCTGGTATAGTGCAACAGCAAAACAATCTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGGATAACATGACCTGGATGGAATGGGAAAGAGAAATTGACAATTACACAAGCTTAATATACACTTTAATTGAAAAATCGCAAAACCAACAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTGACATAACAAAGTGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGCGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCAGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGACCATTAGTGAACGGATTCTTAACAATTATCTGGGTCGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAACTCCTCAAATATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAAGAATAGTGCTATTAGCTTGCTTAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGATTATAGAAGTATTACAAAGAGCTGGGAGAGCTTTTCTCCACATACCTAGAAGAATAAGACAGGGCTTAGAAAGGGCTTTGCTATAA,5631,8207,MRAKGTRKNYWWKWGMMLLGMLMICNAAEQSWVTVYYGVPVWKEASTTLFCASDASAVDTEVHNVWATHACVPTDPNPQEIVLENVTENFNMWKNNMVDQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNWNTTNKLNATDTNSSRIEEEMKEEMKNCSFNVTSSIGNKMQKEYALFYKLDVVPINNDSTSYTLINCNTSVITQACPKVSFEPIPIHYCTPAGFALLKCNDKKFNGTGPCTNVSTVQCTHGIKPVVSTQLLLNGSLAEEEIVIRSENFSNNAKTIIVQLNETVEITCERPNNNTRKGIHLGFGRALYATGEIIGDIRQAYCNLNRTKWENTLKRIVTKLGEQFKNQNKTITFDLSSGGDPEIMLHSFSCGGEFFYCNTTQLYNSNRKQENNGTWNNNGSNTNDNITLPCRIKQIINRWQEVGKAMYAPPITGAIRCLSNITGLLLTRDGGTRANLSETFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGTIGAMFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTAVPWNTSWSNKSLDEIWNNMTWMQWEKEIDNYTGVIYNLIEEAQNQQEKNEQELLQLDTWASLWNWFNITKWLWYIKIFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTRLPAQRGPDRPGGIEEEGGERDKDRSIRLVDGFLAIIWEDLRSLCLFSYHRLRDLLLIVTRVVELLGRRGWEALKYCWNLLQYWSQELRNSAVSLLNATAIAVAEGTDRIIEIVQIICRAILHIPRRIRQGLERALQ*,ATGAGAGCGAAGGGGACCAGGAAGAATTACTGGTGGAAATGGGGCATGATGCTCCTTGGGATGTTAATGATCTGTAATGCTGCAGAACAATCGTGGGTCACAGTCTACTATGGGGTGCCTGTGTGGAAAGAAGCATCCACCACCTTATTTTGTGCATCAGATGCTAGCGCAGTTGACACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCTAACCCACAGGAAATAGTATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGATCAAATGCATGAAGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATTGGAATACTACTAATAAGTTGAATGCTACTGATACCAATAGTAGTAGAATAGAGGAAGAAATGAAGGAAGAAATGAAAAACTGCTCTTTCAATGTCACCTCAAGCATAGGAAATAAGATGCAGAAAGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAAATAATGATAGTACAAGCTATACACTGATAAATTGTAACACCTCAGTCATTACACAAGCCTGTCCAAAGGTATCCTTTGAACCAATTCCCATACATTATTGTACCCCAGCTGGTTTTGCGCTTCTAAAGTGTAATGATAAGAAATTCAATGGAACAGGACCATGTACAAATGTCAGTACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTCTAGCAGAAGAAGAAATAGTGATTAGATCTGAAAATTTCTCGAACAATGCTAAAACCATAATAGTACAGCTGAATGAAACTGTAGAAATTACTTGTGAAAGACCCAACAACAATACAAGAAAAGGTATACATCTAGGATTTGGGAGAGCATTGTATGCAACAGGAGAAATAATAGGAGATATAAGACAAGCATATTGTAACCTTAATAGAACAAAATGGGAAAACACTTTAAAACGGATAGTTACAAAATTAGGAGAACAATTTAAAAATCAAAATAAAACAATAACCTTCGATCTCTCCTCAGGAGGGGACCCAGAAATTATGCTGCACAGTTTCAGTTGTGGAGGGGAATTCTTCTACTGTAATACAACACAGCTGTATAATAGTAATAGGAAGCAGGAGAATAATGGCACTTGGAATAATAATGGGAGTAATACTAATGATAATATCACACTCCCATGCAGAATAAAACAAATTATAAACAGGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCACAGGAGCAATTAGATGTTTATCAAATATTACAGGGCTGTTATTAACAAGAGATGGTGGAACGAGAGCGAATCTGTCCGAGACCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAACAATAGGAGCTATGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATGACGCTGACGGTACAAGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGGGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTAGAAAGATACCTAAAGGATCAACAGCTCCTAGGGATTTGGGGTTGCTCTGGAAAACTCATCTGCACCACTGCTGTGCCTTGGAATACTAGTTGGAGTAATAAATCTCTAGATGAAATTTGGAACAACATGACCTGGATGCAGTGGGAAAAAGAAATTGACAATTACACAGGCGTAATATACAATTTAATTGAAGAAGCGCAAAACCAACAAGAAAAGAATGAACAAGAATTATTGCAATTGGATACGTGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTATATAAAGATATTCATAATGATAGTAGGAGGCTTAGTAGGTTTAAGAATAGTTTTTGCTGTACTCTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTCCAGACCCGCCTCCCAGCCCAGAGGGGACCCGACAGGCCCGGAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAAGGACAGATCCATAAGATTAGTAGATGGATTCTTAGCAATCATCTGGGAAGACCTACGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTGACGAGGGTTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGCTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAGGAATAGTGCTGTTAGTTTGCTTAATGCCACAGCTATAGCAGTAGCTGAGGGAACAGATAGGATTATAGAAATAGTACAAATAATTTGTAGAGCTATTCTCCACATACCTAGACGAATAAGACAGGGCTTAGAAAGAGCTTTGCAATAA -OQ092464,tat_exon2,8371,8463,forward,0.59375,0,RPTSQPRGDPTGQKESKKKVERETETDPDH,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCAGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGACCATTAG,7789,7881,RPASQPRGDPTGPEESKKKVERETRTDP*D*,AGACCCGCCTCCCAGCCCAGAGGGGACCCGACAGGCCCGGAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAAGGACAGATCCATAAGATTAG -OQ092464,rev_exon2,8372,8647,forward,0.3978494623655915,0,DPPPSPEGTRQARRNRRRRWRERQRQIRTISERILNNYLGRPAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGTPQILVESPAVLESGTKE,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCAGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGACCATTAGTGAACGGATTCTTAACAATTATCTGGGTCGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAACTCCTCAAATATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAAGAATAG,7790,8065,DPPPSPEGTRQARRNRRRRWRERQGQIHKISRWILSNHLGRPTEPVPLQLPPLERLTLDCDEGCGTSGTQGVGSPQILLESPAVLESGTKE*,GACCCGCCTCCCAGCCCAGAGGGGACCCGACAGGCCCGGAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAAGGACAGATCCATAAGATTAGTAGATGGATTCTTAGCAATCATCTGGGAAGACCTACGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTGACGAGGGTTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGCTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAGGAATAG -OQ092464,nef,8791,9411,forward,0.3542857142857141,0,MGGKWSKSSVVGWPAIRERMRRAEPAADGVGAASRDLEKHGAITSSNTAATNADCAWLEAQEDEEVGFPVRPQVPLRPMTYKGAVDLSHFLKEKGGLEGLIHSQKRQDILDLWVYHTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPVEPDKVEEANEGENNSLLHPMSLHGMDDPEKEVLVWRFDSRLAFHHMARELHPEYYKDC,ATGGGTGGTAAGTGGTCAAAGAGTAGTGTGGTTGGATGGCCTGCTATAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATGGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGTGCCTGGCTAGAAGCACAAGAGGATGAGGAGGTGGGTTTTCCAGTCAGGCCTCAGGTACCTTTAAGACCAATGACTTACAAGGGAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAAAAGACAAGATATCCTTGATCTGTGGGTCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATCAGGTATCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGAGCCAGATAAGGTAGAAGAGGCCAATGAAGGAGAGAACAACAGCTTGTTACACCCTATGAGCCTGCATGGGATGGATGACCCAGAGAAAGAAGTGTTAGTATGGAGGTTTGACAGTCGCCTGGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAGTACTACAAGGACTGCTGA,8209,8841,MGGKWSKCSMGGWTAVRERMRRTEPATEPAADGVGAVSRDLEKHGAITSSNTAATNADCAWLEAQEEEEVGFPVRPQVPLRPMTYKGALDLSHFLKEKGGLEGLIYSQKRQEILDLWVYNTQGYFPDWQNYTKGPGIRYPLTFGWCFKLVPVDPEQVEKANEGENNSLLHPMSQHGMDDPEKEVLMWKFDSRLAFRHVARELHPEYYKDC*,ATGGGTGGCAAGTGGTCAAAATGTAGCATGGGTGGGTGGACTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAACTGAGCCAGCAGCAGATGGGGTGGGAGCAGTATCTCGAGACTTGGAAAAACATGGAGCAATTACAAGTAGCAATACAGCAGCTACTAATGCTGACTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAAGTGGGCTTCCCAGTCAGACCTCAGGTACCTTTAAGACCAATGACTTACAAAGGAGCTTTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAGAAAAGACAAGAGATCCTTGATCTGTGGGTCTACAACACACAAGGATACTTCCCTGATTGGCAGAACTACACAAAAGGGCCAGGGATCAGATATCCACTGACCTTCGGATGGTGCTTCAAGCTAGTGCCAGTTGATCCAGAACAGGTAGAAAAGGCCAATGAAGGAGAGAACAACAGCCTGTTACACCCTATGAGCCAGCATGGGATGGATGACCCAGAGAAAGAAGTGTTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCGCCACGTAGCCAGAGAGCTGCATCCGGAGTATTACAAAGACTGCTGA -OQ092467,gag,808,2307,forward,0.2507968127490039,0,MGARASVLSGGELDKWEKIRLRPGGKKQYKLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPALQTGSEEFRSLYNTVATLYCVHQKIEVKDTKEALEKIEEEQNQSKKKAAAAAADTGNRSQVSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVIEEKAFSPEVIPMFTALSEGATPQDLNTMLNTIGGHQAAMQMLKETINEEAAEWDRLHPVQAGPVAPGQMREPRGSDIAGTTSNLQEQIGWMTHNPPIPVGDIYKRWIILGLNKIVRMYSPVSILDIKQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSTVMMQRGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSKPEPTAPPEESFRFGEETTTPSQKQGPIDKELYPLASLKSLFGNDPSSQ,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATAAATGGGAAAAAATTCGGTTAAGGCCAGGAGGAAAAAAACAATACAAATTAAAACATCTAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTTCTAGAGACATCAGAAGGGTGTAGACAAATACTGGGACAGCTACAACCAGCTCTTCAGACAGGATCAGAAGAATTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAAGATAGAGGTAAAAGACACCAAGGAAGCTTTAGAGAAAATAGAGGAAGAGCAAAACCAAAGTAAGAAAAAAGCAGCAGCTGCAGCAGCTGACACAGGAAACAGAAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCTTTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAATAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTACAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAATAGGTGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATTAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCAGGCAGGGCCTGTTGCACCAGGCCAAATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTAACCTTCAGGAACAAATAGGATGGATGACACATAATCCACCTATCCCAGTAGGAGATATCTATAAAAGATGGATAATTCTGGGGTTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTGAGAGCCGAGCAAGCAACACAGGAAGTAAAGAATTGGATGACAGAAACTTTGCTGGTCCAAAATGCAAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGGGGACCAGGCCACAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAACTGTAATGATGCAGAGAGGCAATTTTAGGAATCAAAGAAAGACAGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCATATAGCAAGAAATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGAAATGTGGAAAAGAAGGGCACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAGCAAACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGGGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAA,789,2291,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA -OQ092467,pol,2100,5111,forward,0.14342629482071723,0,FFREDLAFPQGEAREFSSEQTRANSPTRRELQVWGRDNNSLSEAGADRQGTVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEDMNLPGKWKPKMIGGIGGFIKVRQYDQILVEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTRILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLKWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIKLPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTAEAELELAENREILKEPVHGVYYDPTKDLIAELQKQGQSQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIAIESIVIWGKTPKFKLPIQKETWETWWTDYWQATWIPDWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETRSGKAGYVTDRGRQKVVPLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSSGIRKILFLDGITKAQDDHERYHSNWRAMASDFNLPPIIAKEIVASCDKCQLKGEATHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTVHTDNGSNFTSAAVKAACWWAGIKQEFGIPYNPQSQGVIESMNKELKKIIGQVREQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,TTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAGCAAACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGGGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAAAAATAGGGGGGCAGCTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGATATGAATTTACCAGGAAAATGGAAACCAAAAATGATAGGAGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCGTAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGCTGTACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGATAGTACTAAATGGAGAAAGTTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCGCATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTAGATGTGGGTGATGCCTATTTTTCAATTCCCTTAGATAAAGACTTCAGGAAGTACACTGCATTTACCATACCTAGTACAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAGGATATTAGAACCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTGGGATCTGACTTAGAAATAGGACAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTGAAGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAAAGCTGCCAGAAAAAGATAGCTGGACTGTCAATGACATCCAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTATGCAGGAATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACTAAAGCACTAACAGAAGTAGTACCACTAACAGCAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGAACCGGTACATGGAGTGTATTATGACCCAACGAAAGACCTAATAGCAGAACTACAGAAGCAGGGGCAAAGTCAATGGACATATCAAATTTATCAAGAACCATTTAAAAATCTGAAAACAGGAAAGTATGCAAGAATGAGGGGTGCCCACACTAATGATGTGAAACAATTAACAGAGGCAGTGCAAAAAATAGCCATAGAAAGCATAGTAATATGGGGAAAGACCCCTAAATTTAAATTACCTATACAAAAAGAAACATGGGAAACATGGTGGACAGACTATTGGCAAGCCACCTGGATTCCTGATTGGGAGTTTGTCAATACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAAAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAGATCAGGCAAAGCAGGATATGTTACTGATAGAGGAAGACAAAAGGTTGTCCCCCTAACAGACACAACAAACCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCAGGATTAGAAGTAAACATAGTGACAGACTCACAATATGCACTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGTTAATAAAGAAGGAAAAGGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTTCTGGAATCAGGAAGATACTATTTTTAGATGGAATAACTAAGGCCCAAGATGATCATGAGAGATACCACAGCAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTATAATAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCACGCATGGACAAGTAGACTGCAGTCCAGGAATATGGCAACTAGATTGTACGCATTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAAGAAACAGCATACTTTCTCTTAAAACTAGCAGGAAGATGGCCAGTAAAGACAGTACATACAGATAATGGCAGCAATTTCACCAGTGCTGCGGTTAAGGCCGCCTGTTGGTGGGCGGGGATCAAGCAGGAATTTGGTATTCCCTACAATCCCCAAAGTCAAGGAGTCATAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAAGTAAGAGAGCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAATTACAGAAACAAATCACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGACCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAGGCAAAGATCATTAGAGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG,2084,5095,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -OQ092467,vif,5056,5634,forward,0.44041450777202074,0,MENRWQVMIVWQVDRMRIRTWNSLVKHHIHVSKKAKGWVYRHHYESTHPRISSEVHIPLGEARLVIATYWGLHTGERDWHLGQGVSIEWREKKYITQVDPDLADQLIHLHYFDCFSESAIRNAIVGRLVSPQCEYQTGHNKVGSLQYLALVALITPKKRKPPLPSVRKLTEDRWNKPQKTKDHRGSHIMSGH,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATACATGTCTCAAAGAAAGCTAAGGGATGGGTTTATAGACACCACTATGAAAGCACCCATCCAAGAATAAGTTCAGAAGTACATATCCCGCTAGGGGAAGCTAGATTAGTAATAGCAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCACTTGGGTCAGGGAGTCTCCATAGAATGGAGGGAAAAGAAATATATCACACAAGTAGACCCTGACCTAGCAGACCAACTAATTCATCTGCATTATTTTGATTGTTTTTCAGAGTCTGCTATAAGAAATGCCATAGTAGGACGTTTAGTTAGCCCTCAGTGTGAATATCAAACAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCACTAGTAGCATTAATAACGCCAAAAAAGAGAAAGCCACCTTTGCCTAGTGTTAGGAAATTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGACCACAGAGGGAGCCATATAATGAGTGGACACTAG,5040,5618,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG -OQ092467,vpr,5574,5864,forward,0.28125,0,MEQAPEDQGPQREPYNEWTLELLEELKSEAVRHFPRIWLHNLGQYIYATYGDTWTGVEALIRILQQLLFIHFRIGCQHSRIGITRQRRARNGASRS,ATGGAACAAGCCCCAGAAGACCAAGGACCACAGAGGGAGCCATATAATGAGTGGACACTAGAGCTTTTAGAGGAACTTAAGAGTGAAGCTGTTAGACATTTTCCTAGGATATGGCTCCATAACTTAGGACAATACATCTATGCAACTTATGGGGATACTTGGACAGGAGTGGAAGCTTTAATAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGAATTGGATGTCAACATAGCAGAATAGGCATTACTCGACAGAGGAGAGCAAGGAATGGAGCCAGTAGATCCTAG,5558,5848,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -OQ092467,tat_exon1,5845,6059,forward,0.25,0,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRPPQNSETHQAALSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAGACTCCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTCTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAAACAGTGAGACTCATCAAGCAGCTCTATCAAAGCA,5829,6043,MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -OQ092467,rev_exon1,5984,6059,forward,0.46153846153846145,0,MAGRSGDSDEDLLKTVRLIKQLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAAACAGTGAGACTCATCAAGCAGCTCTATCAAAGCA,5968,6043,MAGRSGDSDEELIRTVRLIKLLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -OQ092467,vpu,6076,6339,forward,0.6090909090909091,0,MQPLVILAIVALVVAAIIAIVVWTIVLIEYRKILRQRKIDSIINRIRERAEDSGNESEGDQEELSALVEMGHHVEMGHHAPWNVDDL,ATGCAACCCTTAGTAATATTAGCAATAGTAGCATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAGAAAGATAGATAGCATAATTAATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGATCAGGAAGAATTGTCGGCACTTGTGGAGATGGGGCACCATGTGGAAATGGGGCACCATGCTCCTTGGAATGTTGATGATCTGTAG,6060,6308,MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*,ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG -OQ092467,env,6239,8806,forward,0.5505630630630636,0,MRVKEIRKNCRHLWRWGTMWKWGTMLLGMLMICSAKEQLWVTVYYGVPVWKEATTTLFCASNAKAYDPEVHNVWATHACVPTDPNPQEVPLENVTENFNMWKNNMVEQMHEDIISLWDQSLKPCVKLTPLCVILNCTNVNVTTNNNSSSEEQMEVGEIKNCSFNIATRIKNKIKKEYALFNRLDVVPIEDDNTSYMLINCNTSVTTQACPKVTFEPIPIHYCAPAGFAILKCNDKKFNGTGPCNNVSTVQCTHGIKPVVSTQLLLNGSLAEEEIVVRSENFTNNAKTIIVQLNKTIEINCIRPNNNTRKSISLRPGQAIYATEDIIGNIRQAHCNIRRKDWDKALEQVVAKLREQFKNKTIVFNQSSGGDPEIVMHSFNCAGEFFYCNTTKLFNSTWNVNNTRNNTTDNSTITLPCRIKQIINRWQEVGKAMYAPPIKGQIKCSSNITGLLLTRDGGVREDNAPEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQRGKRAVTLGAMFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLQDQQLLGIWGCSGKLICTTAVPWNTSWSNKSLEKIWKNMTWMEWEKEINNYTRTIYTLIEESQNQQEKNEQELLELDKWASLWNWFDITNWLWYIKIFIMIVGGIVGLRIVFTVLSIVNRVRQGYSPLSFQTRFPAPRGPDRPEGTEEEGGERDRDRSGQLVNGFFALIWDDLQSLCLFSYRRLRDLLLIVARIVELLGHRGWEALKYWWNLLQYWSQELKKSAVSLLNATAIAVAEGTDRVIEVVQRIGRAILHIPRRIRQGLERALL,ATGAGAGTGAAGGAGATCAGGAAGAATTGTCGGCACTTGTGGAGATGGGGCACCATGTGGAAATGGGGCACCATGCTCCTTGGAATGTTGATGATCTGTAGTGCTAAAGAACAATTGTGGGTCACAGTTTATTATGGGGTACCTGTGTGGAAAGAAGCAACCACCACTTTATTTTGTGCATCAAATGCTAAAGCATATGACCCAGAGGTGCATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCTAACCCACAAGAAGTACCATTGGAAAATGTGACAGAAAATTTTAATATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGACATAATCAGTTTATGGGATCAAAGCTTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTATTTTAAATTGCACTAATGTGAATGTTACTACTAACAATAATAGTAGTAGTGAGGAACAGATGGAGGTAGGAGAAATAAAAAACTGCTCTTTCAATATTGCCACAAGAATAAAAAATAAGATAAAGAAAGAATATGCACTTTTTAATAGACTTGATGTAGTACCAATAGAGGATGATAATACAAGCTATATGTTGATAAATTGTAATACCTCAGTCACTACACAGGCCTGTCCAAAGGTAACCTTTGAACCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATACTAAAATGTAATGATAAAAAGTTCAATGGAACAGGACCATGTAACAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTGCTGTTAAATGGTAGTCTAGCAGAAGAAGAGATAGTAGTTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACAGCTGAATAAAACTATAGAAATTAATTGTATAAGACCCAACAATAATACAAGAAAAAGTATATCTTTAAGACCGGGGCAAGCAATTTATGCAACAGAAGACATAATAGGAAATATAAGACAAGCACATTGTAACATTAGGAGAAAAGACTGGGATAAAGCTTTAGAACAGGTAGTTGCAAAATTAAGAGAACAATTTAAGAATAAAACAATAGTCTTTAATCAATCCTCAGGAGGAGACCCAGAAATTGTAATGCATAGTTTTAATTGTGCAGGGGAATTTTTCTACTGTAACACAACAAAGCTGTTTAATAGTACTTGGAATGTTAATAACACTCGGAATAATACTACTGATAATAGCACCATCACTCTCCCGTGCAGAATAAAACAAATTATAAACAGATGGCAGGAAGTAGGAAAAGCAATGTATGCTCCTCCCATCAAAGGGCAAATTAAATGTTCATCAAATATTACAGGGTTATTATTAACAAGAGATGGTGGTGTCCGCGAGGACAACGCCCCTGAGATCTTTAGACCTGGAGGAGGAGATATGAGGGATAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTGGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAAAGAGGAAAAAGAGCAGTAACGCTGGGAGCTATGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATGACGCTGACGGTACAGGCCAGGCAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATCTGCTGAGGGCTATTGAGGCGCAACAGCATATGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACTTACAGGATCAACAGCTCCTAGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTACCTTGGAATACTAGTTGGAGTAATAAATCTTTGGAAAAGATTTGGAAAAACATGACCTGGATGGAGTGGGAGAAAGAAATTAACAATTACACAAGGACAATATACACCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAGGAATTATTGGAATTGGATAAGTGGGCAAGTTTGTGGAATTGGTTTGACATAACAAACTGGCTGTGGTATATAAAAATATTCATCATGATAGTAGGAGGTATAGTAGGGTTAAGAATAGTTTTTACTGTGCTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTGTCATTCCAGACCCGCTTCCCAGCTCCGAGGGGACCAGACAGGCCCGAAGGAACAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGGCAATTAGTGAACGGATTCTTTGCGCTTATCTGGGACGACCTGCAGAGCCTGTGCCTCTTCAGCTACCGCCGCTTGAGAGACTTACTCTTGATTGTAGCGAGGATTGTGGAACTTCTGGGACACAGGGGGTGGGAGGCCCTCAAATATTGGTGGAATCTCCTGCAGTACTGGAGTCAGGAACTAAAGAAGAGTGCTGTTAGTTTGCTTAATGCCACAGCTATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAATTGGTAGAGCTATTCTCCACATCCCTAGAAGAATAAGACAGGGCTTAGAAAGGGCTTTGCTATAA,6223,8793,MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*,ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA -OQ092467,tat_exon2,8388,8480,forward,0.5806451612903225,0,RPASQLRGDQTGPKEQKKKVERETETDPGN,AGACCCGCTTCCCAGCTCCGAGGGGACCAGACAGGCCCGAAGGAACAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGGCAATTAG,8375,8467,RPTSQPRGDPTGPKEPKKKVERETETDPFD*,AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG -OQ092467,rev_exon2,8389,8664,forward,0.3913043478260869,0,DPLPSSEGTRQARRNRRRRWRERQRQIRAISERILCAYLGRPAEPVPLQLPPLERLTLDCSEDCGTSGTQGVGGPQILVESPAVLESGTKEECC,GACCCGCTTCCCAGCTCCGAGGGGACCAGACAGGCCCGAAGGAACAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGGCAATTAGTGAACGGATTCTTTGCGCTTATCTGGGACGACCTGCAGAGCCTGTGCCTCTTCAGCTACCGCCGCTTGAGAGACTTACTCTTGATTGTAGCGAGGATTGTGGAACTTCTGGGACACAGGGGGTGGGAGGCCCTCAAATATTGGTGGAATCTCCTGCAGTACTGGAGTCAGGAACTAAAGAAGAG,8376,8651,DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*,GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG -OQ092467,nef,8808,9416,forward,0.583333333333333,0,MGGKWSKCSLVGWPAIRERMRRAEPAPAAEGVGAASRDLEKHGALTTSNTAANNAACAWLEAQEEEEVGFPVRPQVPLRPMTYKGAMDLGHFLKEKGGLEGLIYSPKRQEILDLWVYHTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPDEGENSCLLHPMNQHGADDTEREVLMWKFDSSLAFHHKARELHPEYYKNC,ATGGGTGGCAAGTGGTCAAAATGTAGTCTGGTTGGATGGCCTGCTATAAGGGAAAGAATGAGACGAGCTGAGCCAGCTCCAGCAGCAGAAGGGGTGGGAGCAGCATCTCGAGACTTGGAAAAACATGGAGCACTCACAACTAGTAATACAGCAGCTAATAATGCTGCTTGTGCCTGGCTGGAAGCACAAGAGGAGGAAGAGGTGGGGTTTCCAGTCAGACCTCAGGTACCTTTAAGACCAATGACTTACAAAGGAGCTATGGATCTTGGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCCAAAAAGACAAGAAATCCTTGATCTGTGGGTCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTATACACCAGGGCCAGGGACTAGATATCCATTAACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGAGCCAGATGAAGGAGAGAATAGCTGTTTGCTACACCCGATGAACCAGCATGGGGCAGATGACACAGAAAGAGAAGTATTAATGTGGAAGTTTGACAGCAGCCTAGCATTTCATCACAAGGCCCGAGAGCTGCATCCGGAGTACTACAAAAACTGCTGA,8795,9415,MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA diff --git a/tests/expected-results-large-csv/subtypes.fasta b/tests/expected-results-large-csv/subtypes.fasta index 94c83ea..569efb5 100644 --- a/tests/expected-results-large-csv/subtypes.fasta +++ b/tests/expected-results-large-csv/subtypes.fasta @@ -161,454 +161,3 @@ CTGGGGAGTGGCGAGCCCTCAGATCCTGCATATAAGCAGCTGCTTTTTGCCTGTACTGGG TCTCTCTGGTTAGACCAGATCTGAGCCTGGGAGCTCTCTGGCTAACTAGGGAACCCACTG CTTAAGCCTCAATAAAGCTTGCCTTGAGTGCTTCAAGTAGTGTGTGCCCGTCTGTTGTGT GACTCTGGTAACTAGAGATCCCTCAGACCCTTTTAGTCAGTGTGGAAAATCTCTAGCA ->Ref.47_BF.ES.08.P1942.GQ372987 -AGGGTTGCCCGTCTGTTGTGTGACTCTGGTAACTAGAGATCCCTCAGACCATTTAGTCAG -TGTGGAAAAATCTCTAGCAGTGGCGCCCGAACAGGGACCCGAAAGCGAAAGTAGAACCAG -AGAAGCTCTCTCGACGCAGGACTCGGCTTGCTGAAGCGCGCTCGGCAAGAGGCGAGGGGC -GGCGACTGGTGAGTACGCCAAAAATTTTGACTAGCGGAGGCTAGAAGGAGAGAGATGGGT -GCGAGAGCGTCAGTATTAAGCGGGGGAGAATTGGATAGATGGGAAAAAATTCGGTTAAGG -CCAGGAGGAAAGAAAACATATAAATTGAAACATATAGTATGGGCAAGCAGGGAGCTAGAA -CGATTCGCTCTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGA -CAGCTACAACCAGCCCTTCAGACAGGATCAGAAGAACTTAGATCACTATTTAATGCAGTA -GCAACCCTCTATTGTGTGCATCAAAAGATCGATGTAAAAGACACCAAGGAAGCTTTAGAT -AAAATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGTACAGCAAGCAGCTGACACAGGA -AATAACAACCAGGTCAGCCAAAATTACCCTATAGTGCAGAACGCCCAGGGGCACATGATA -CATCAGCCTATTTCACCTAGAACTTTAAATGCATGGGTAAAGGTGGTAGAAGAAAAGGCT -TTTAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGATGCACCCCACAAGAT -TTAAACACCATGTTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGCTAAAAGAC -ACCATTAATGAGGAAGCTGCAGAATGGGACAGATTACATCCAGTGCATGCAGGACCTATC -CCACCAGGCCAGATGAGGGAACCTAGGGGAAGTGATATAGCTGGAACTACCAGTACCCTT -CAGGAACAAATACAATGGATGACAAGCAACCCACCTGTCCCAGTGGGAGATATCTATAAA -AGATGGATCATCCTAGGATTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTTTG -GACATAAAACAAGGGCCAAAAGAACCCTTTAGAGATTATGTGGATAGGTTCTTTAAAGTC -CTAAGAGCCGAGCAAGCTACACAGGATGTAAAAAATTGGATGACAGAAACCTTGTTGGTC -CAAAATTCGAACCCAGATTGTAAGACCATTTTAAAAGCATTGGGACCACAGGCTACACTA -GAAGAAATGATGACAGCATGCCAAGGAGTGGGAGGGCCCGGCCATAAAGCAAGAGTTTTG -GCAGAAGCAATGAGCCAAGCAACAGCTTCAAATGTCATAATGATGCAGAAAGGCAATTAT -AGGGGCCAGAGAAAGATTGTCAAGTGTTTCAATTGTGGCAGAGAAGGACACATAGCCAAA -AATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAGTGTGGAAAAGAAGGACACCAAATG -AAAGACTGCACTGAAAGACAGGCTAATTTTTTAGGGAAAATTTGGCCTTCCAGCAAGGGG -AGGCCAGGGAATTTTCTCCAGAACAGGCCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTG -GGGTTTGGAGAGGAGACAACCCCCTCTCCGAAGCAGGAACAGAAAGAGGGACTGTATCCT -CCCTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTAGTCACAGTAAAAGTAGGGG -GACAGATGAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGACATAA -ATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAA -GACAGTATGATCAAATAATCATAGAAATTTGTGGAAAGAAAGCTATAGGTACAGTATTAG -TAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAAATTGGTTGCACTT -TAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTGAAGCCAGGAATGGATG -GCCCAAAGGTCAAACAATGGCCATTAACGGAAGAAAAAATAAAAGCATTAATAGAAATCT -GTACAGAAATGGAAAAGGAAGGGAAAATCTCAAAAATTGGGCCTGAAAATCCATACAATA -CTCCAGTATTTGCCATAAAGAAAAAAGACAGTACAAAATGGAGAAAATTAGTAGATTTTA -GAGAACTTAATAAAAGAACTCAAGATTTTTGGGAGGTTCAATTAGGAATACCGCATCCTG -CAGGATTAAAAAAGAAAAAATCAGTAACAGTATTAGATGTGGGAGATGCATATTTTTCAG -TTCCCTTATATGAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAACAATG -AGACACCAGGGATTAGATATCAGTACAATGTACTTCCACAGGGATGGAAAGGATCACCAG -CAATATTTCAAAGCAGCATGACGAAAATCTTAGAGCCTTTTAGAAAACAGAATCCAGACA -TAGTGATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGC -ATAGAACAAAAATAGAGGAACTGAGACAACATCTACTGAAATGGGGGCTTACTACACCAG -ACAAAAAACATCAGAAAGAACCTCCCTTCCTTTGGATGGGTTATGAACTACATCCTGATA -AATGGACAGTACAGCCTATAGTGCTGCAAGAAAAGGACAACTGGACTGTCAATGACATAC -AGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGAC -AATTATGCAGACTCCTTAGGGGAACCAAGGCACTAACAGACATAGTACCACTAACAAAAG -AGGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAAAGAACCAGTACATGGGGTGT -ATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGA -CATATCAAATCTATCAGGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGGTCAA -GGGGTGCCCACACTAATGATGTAAGACAGTTAACAGAGGCAGTGCAAAAAATAACCACAG -AAAGCATAGTAATATGGGGGAAAACTCCTAAATTTAGACTACCCATACAAAGAGAGACAT -GGGAAACATGGTGGACAGACTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCA -ATACCCCCCCTCTAGTAAAGTTATGGTACCAGTTAGAGAAAGAACCCATAATAGGAGCAG -AAACTTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATG -TTACTAATAAAGGAAGACAGAAAGTTGTCTCCCTAACTGACACAACCAATCAGAAGACTG -AGTTACAAGCAATCCTTCTAGCTTTACAGGATTCAGGATTAGAAGTAAACATAGTAACAG -ACTCACAGTATGCATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAATTAG -TCAATCAAATAATAGAGCAATTAATAAACAAGGAAAAGGTCTACCTGTCATGGGTACCAG -CACACAAAGGGATTGGAGGAAATGAACAAGTAGATAAATTAGTTAGTGCTGGAATCAGGA -AAGTACTATTTTTAGATGGGATAGATAAAGCCCAAGAAGAACATGAAAAATATCACAATA -ATTGGAGGGCAATGGCCAGTGATTTTAACATCCCACCTGTGGTAGCAAAAGAGATAGTAG -CCAGCTGTGATAAATGTCAGCAAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTC -CAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTC -ATGTGGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAG -CATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATG -GCAGCAATTTCACCAGCACTACAGTTAAGGCCGCCTGTTGGTGGGCGGGGATCAAGCAGG -AATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATAAATAAGGAAT -TAAAGAAAATTATAGGACAGATAAGAGATCAGGCTGAACATCTTAAGACAGCAGTGCAAA -TGGCAGTATTCATCCACAATTTTAAAAGAAGAGGGGGGATTGGGGGGTACAGTGCAGGGG -AAAGAATAGTAGACATAATAGCAACAGACCTACAAACTAAAGAATTACAAAAACAAATTA -CAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGACCCAGTTTGGAAAGGAC -CAGCAAAACTACTCTGGAAAGGTGAAGGGGCAGTGGTCATACAAGACAATAGTGAAATAA -AAGTAGTGCCAAGAAGAAAAGCAAAGATAATTAGGGATTATGGAAAACAGATGGCAGGTG -ATGATTGTGTGGCAGGTAGACAGGATGAGGATTAGCAAATGGAAAAGTTTAGTTAAATAC -CATATACATACTTCAAAGAAAGCCAAAAAATGGTTCTATCGACATCACTTTGAAAGCAGG -CATCCAAAAATAAGCTCAAAAGTACACATCCCWCTAGAGGAAGAAAATAAATTAGTAGTA -ACAACATATTGGGGTCTGAATACAGGGGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCC -ATAGAATGGAGGCAGGGGAAGTATAGGACACAAATAGACCCTGGCCTGGCAGACCAACTG -ATTCATATATATTATTTTGATTGTTTTTCAGAATCTGCTATAAGGAAAGCCATATTAGGA -CATAGAGTTAGCCCTAGGTGTAACTATCAAGCAGGACATAACAAGGTAGGATCTCTACAA -TATTTGGCACTAACAGCATTAATAGCTACAAAGAAGGCAAAGCCGCCTTTGCCTAGTGTC -CAGAAATTAGTARRAGACAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGAGAGC -CATACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAAAATGAAGCTGTCAGACATT -TTCCTAGGCCATGGCTCCATGGCTTAGGACAACATATCTACAACACCTATGGGGATACTT -GGGAGGGAGTTGAAGCTATAATAAGGATGCTGCAACATCTACTGTTTATCCATTTCAGAA -TTGGGTGCAATCATAGCAAAATAGGCATTATTCGACAGAGAAGAACAAGAAATGGAGCCA -GTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGCCAGCCTAGGACTGCGTGTACC -AAATGTTATTGTAAGAGATGTTGCTTTCATTGCCAAGTTTGCTTCATAACAAAAGGCTTA -GGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAARACCTCCTCAAAGCGGTCAG -ACTCATCAAGATTCTCTATCAAAGCAGTGAGTAGCATATGTAATGCAATCTTTAGAGATA -TTAGCAATAGTAGCACTAGTAGTAGCAGCAATATTAGCAATAGTTGTGTGGTCTATAGTA -CTCATAGAATATAGGAAAATATTAAGAGAAAGAAAAGTATATAAATTAATTGACAGAATA -AGAGAAAGAGCAGAAGACAGTGGCAATGAGAGCGAGGGAGATCAAGAAGAATTATCAGCA -ATGGTGGAAAGGGGGCATCTTGCTCCTTGGGATATTAATGATCTGTAATGCTGAAAAGTC -TGAAAAGTTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACCACCAC -TCTATTTTGTGCATCAGATGCTAAGGCATATGATACAGAGGCACATAATGTTTGGGCCAC -ACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAATGTTATTGAAAAATGTGACAGA -AAATTTTAACATGTGGAAAAACAACATGGTAGATCAGATGCATGAAGATATAATTAGTTT -ATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACACCACTTTGTGTTACCTTAAATTG -CAATAATACTGTCACCACTAATGCTAGCATGAATAATAGTGGAGAAATGAAAAATTGCTC -TTTCAATATCACCACCCAAACGAGAGGGAGAAAGAGAGAATATGCACTTTTTTATAACCT -TGATGTAGTGCAATTAGAATCAGACAAAACTAGTACTAGCTATAGGTTGATAAGTTGTAA -CACCTCAGTCATTACACAGGCTTGTCCAAAGATATCCTTTGAGCCAATTCCCATACATTA -TTGTGCCCCAGCTGGTTTTGCGATTCTAAAGTGTAATGATAAGCAGTTCAATGGAACAGG -ACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAAC -TCAATTGCTGTTAAATGGCAGCCTAGCAGAAGAAGATATAATAATCAGGTCTCAAAATAT -CTCAGATAATGCAAAAAGCATAATAGTACAGTTGAATGAATCTATAAGCATTAATTGTAT -AAGACCCGGCAATAATACAAGAAAAAGTATACATATGGGACCAGGCAAGGTATTTTATGC -AACAGGAGATATAATAGGAAATATAAGACAAGCACATTGTAACATTAGTAAAGCAAAATG -GAATAACACTTTAAGACAGATAGCCAGAAAATTAGGAGAACAATTTAACAATAAAACAAT -AGTCTTTAATCAATCCTCAGGAGGGGACCCAGAAATTGTAATGCATACTTTTAACTGTGG -AGGGGAATTTTTCTACTGTAATACAACATCACTGTTTAATAGTACATGGGAGAATGATAC -AAATATTACTGAAGAATCAAATAGCTCAGATGACACAATCACACTCCAATGCAAAATAAA -ACAAATTATAAACTTGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGG -ATACATTAACTGTTCATCAAATATCACAGGGCTGATATTAGTAAGAGATGGTGGTAATAA -CAGAACAAGTGAGAGTGAGACCTTCAGACCTGAAGGAGGAAATATGAAGGACAATTGGAG -AAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAATAGCACCCACCAA -GGCAAAGAGAAGAGTGGTGCAGAGACAAAAAAGAGCAGTGGGATTTGGAGCTTTGTTCCT -TGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGGCGCTGACGGTACA -GGCCAGACTATTATTGTCTGGTATAGTGCAGCAGCAGAACAATCTGCTGAGGGCTATTGA -GGCGCAACAGCATATGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGT -CCTGGCTGTGGAAAGATACCTAAAGGATCAACAGATCATGGGGATTTGGGGTTGCTCTGG -AAAATACATCTGCACCACTGCTGTGCCTTGGAATACTAGCTGGAGTAATAAATCTTATGA -TCAGATTTGGAAGAACATGACCTGGATGCAGTGGGAAAAAGAAATTGATAATTACACAAG -TGAAATATACAGCTTAATTGCACTATCGCAAGACCAGCAAGAAAAGAATGAACAAGAATT -ATTGGAATTGGACAAATGGGCAAGCTTGTGGAATTGGTTTGACATATCAAACTGGCTGTG -GTATATAAAAATATTCATAATGATAGTAGGAGGCTTAGTAGGATTAAGAATAGTTTTTGC -AATACTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTGTCATTCCAGACCCA -CCACCCAGCTCCGAGGGGACCCGACAGGCCCGGAGGAATCGAAGAAGAAGGTGGAGAGAG -AGACAGAGACAGATCCGGTCGATCGGCGAGCGGATTCTTAACACTTATCTGGATCGACCT -ACGGAGCCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTACTCTTGATTGCAGCGAG -GATTGTGGAACTTCTGGGACGCAGGGGGTGGGAACTCCTCAAATACTGGTGGAACCTCCT -ACAGTATTGGAGTCAGGAACTAAAGAATAGTGCCATTAGCTTGCTTAATACCACAGCAAT -AGTAGTAGCTGAGGGGACAGATAGAATTATAGAAGCTTTGCAAAGTGCTGGTAGAGCTGT -TCTCCACATACCTAGAAGAATAAGACAGGGCTTAGAAAGGGCTTTACTATAAAATGGGAA -ACAAGTGGTCAAAAAGTAGTATAGTTGGATGGCCTACTATAAGGGAAAGAATAAGACGAA -CCCCTCCAATAGCAGAAGGGGTGGGAGCAGTCTCTCGAGACCTAGGAAAGCATGGAGCAA -TCACAAGTAGCAACACAGCAGCTAATAATCCTGACTTGGCCTGGCTGGAAGCACAGGAGG -GTGAGGAAGTAGGCTTCCCAGTCAGACCTCAGGTACCTTTAAGACCAATGACTTACAAGG -GAGCTTTCGATCTTAGCTTCTTTTTAAAAGAAAAGGGGGGACTGGAAGGGTTAATTTATT -CCAGGAAAAGACAAGAGATCCTTGATCTATGGGTTTACCACACACAAGGATACTTCCCTG -ATTGGCAGAACTACACACCAGGGCCAGGGGTCAGGTATCCATTGACCTTTGGGTGGTGCT -TCAAGCTAGTACCAGTTGACCCAGAGGAGGTAGAAAAGGCCAATGAAGGAGAAAACAACT -GCTTGCTACACCCCATGAGCCAACATGGAATGGAGGATGAAGACAGAGAAGTACTGATGT -GGAAGTTTGACAGACACCTAGCATCTAAGCACGTAGCCCGAGAGCTACATCCGGAGTATT -ACAAGGACTGCTGACATCGAGCTTTCTACAAGGGACTTTCCACTGGGGACTTTCCGAGGG -AGGTGTGGCCTGGGCGGGACAGGGGAGTGGCGAGCCCTCAGATTGC ->Ref.B.TH.90.BK132.AY173951 -GGACCTGAAAGCGAAAGAGAAACCAGAGGAGCTCTCTCGACGCAGGACTCGGCTTGCTGA -AGCGCGCACGGCAAGAGGCGAGGGGCGGCGACTGGTGAGTACGCCAAAAATTTTGACTAG -CGGAGGCTAGAAGGAGAGAGATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGACAATTAG -ATAGATGGGAGAAAATTCGGTTACGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATA -TAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTATTGGAAACAT -CAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCAAGCCTTCAGACAGGATCAGAAG -AACTTAGATCATTATATAATACAATAGCAGTCCTCTATTGTGTACATCAAAAGATAGAGG -TAAAAGACACCAAGGAAGCTTTAGAGAAGATAGAGGAAGAACAAAACAAAAGTAAGAAAA -AGGCACAGCAAGCAGCAGCTAACACAGAAAACAGCAGCCAGGTTAGCCAAAATTACCCTA -TAGTGCAAAATATGCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATG -CATGGGTAAAAGTAGTAGAAGAAAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAG -CATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGAC -ATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATA -GATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAA -GTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACACATAATC -CACCTATCCCAGTGGGAGAAATTTACAAAAGATGGATAATCCTGGGATTAAATAAAATAG -TAAGGATGTATAGCCCTACCAGCATTTTGGACATAAGACAAGGACCAAAGGAACCCTTTA -GAGACTATGTAGACCGGTTCTATAAGACTCTAAGAGCCGAGCAAGCCTCACAGGAGGTAA -AAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTT -TAAAAGCATTGGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGG -GAGGTCCCGGCCATAAGGCAAGAGTTTTGGCGGAAGCAATGAGCCAAGTGACAAATTCAG -CTACCATAATGATGCAGAAAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCA -ATTGTGGCAAAGAAGGGCACATAGCCCGAAATTGCAGGGCCCCTAGGAAGAAGGGCTGTT -GGAGATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAAAGACAGGCTAATTTTT -TAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAG -AGCCGACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTC -AGAAGCAGGAGACAATAGACAAGGAACTATATCCTTTAACTGCCCTCAAATCACTCTTTG -GCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATAC -AGGGGCAGATGATACAGTATTAGAAGAAATGAATTTGCCAGGAAGATGGAAACCAAAAAT -GATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAATATGATCAGATACTTGTAGAAAT -CTGTGGACATAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGG -AAGAAATTTGTTGACTCAGCTTGGTTGCACTTTAAATTTTCCTATTAGTCCTATTGAAAC -TGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGAC -AGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAAT -TTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGA -CAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAAAGAACTCAAGACTT -CTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTGAAAAAGAAAAAATCAGTAAC -GGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATAAAGACTTCAGGAAATA -TACTGCATTTACCATACCGAGTACAAACAATGAGACACCAGGGATTAGATATCAGTACAA -TGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAATGTAGCATGACAAAAAT -CTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTT -GTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACA -ACATCTGTTGAGGTGGGGATTTACCACACCAGATAAAAAACATCAGAAAGAACCTCCATT -CCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAACCTATAGTGCTGCC -GGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGC -AAGTCAGATTTACCCAGGGATTAAAGTAAAGCAGTTATGTAAACTCCTTAGGGGAACCAA -AGCACTAACAGAAGTAGTACCACTAACAAAAGAGGCAGAGCTAGAACTGGCAGAAAACAG -GGAAATTCTAAAAGAAACAGTACATGGAGTGTATTATGACCCATCAAAAGATTTAATAGC -AGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAA -AAATCTAAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACA -ATTAACAGAGGCAGTGCAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAGACTCC -TAAATTCAAACTACCCATACAAAAAGAAACATGGGAAACATGGTGGACAGAGTATTGGCA -AGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTA -CCAGTTAGAAAAAGAGCCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCGGCTAA -CAGGGAGACTAAATTAGGAAAAGCAGGATATGTGACTAACAGAGGAAGACAAAAAGTTGT -CTCCCTAACTGACACAACAAATCAAAAGACTGAGTTACAAGCAATTCACCTAGCTTTGCA -GGATTCGGGATTAGAAGTAAATATAGTAACAGACTCACAATATGCATTAGGAATCATTCA -AGCACAACCAGATAAAAGTGAATCAGAGATAGTCAGTCAAATAATAGAGCAGTTAATACA -AAAGGAAAAGGTCTACCTGGCATGGGTACCAGCACACAAGGGAATTGGAGGAAATGAACA -AGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTCTAGATGGAATAGATAA -GGCCCAAGAAGAGCATGAAAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAA -CCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGG -AGAAGCCATGCATGGACAGGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACA -TTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGA -AGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAG -ATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTACTACGGTTAA -GGCTGCCTGTTGGTGGGCGGGGATCAAGCAGGAATTTGGCATCCCCTACAATCCCCAAAG -TCAAGGAGTAGTAGAATCTATAAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGA -TCAGGCTGAACATCTTAAAACAGCAGTACAAATGGCAGTATTTATCCACAATTTTAAAAG -AAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGA -CATACAGACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTA -CAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTGTGGAAAGGTGAAGG -GGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGAT -CATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGA -GGATTAAAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAAAGAAAGCTAAGG -GATGGTTGTATAGACATCACTATCAAAGCATTCATCCAAGAATAAGTTCAGAAGTACATA -TCCCACTAGGAGAGGCTAGCTTGGTAATAAAGACATATTGGGGTCTGCATACAGGAGAAA -GAGAATGGCATTTGGGTCAAGGAGTCTCCATAGAATGGAGGAAAGGAAGATATAACACAC -AAGTAGACCCAGGCCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAG -AATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGTTGTGAATATCAAG -CAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCACTAACAGCATTAAGAACACCAA -AGAAGATAAAGCCACCTTTGCCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACAAGC -CCCAGAAGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAGAGCTTTTAGA -GGAGCTTAAGAATGAAGCTGTTAGACACTTTCCTAGGACATGGCTCCACGGATTAGGGCA -ATATATCTATGAAAATTATGGGGACACTTGGGCAGGAGTGGAGGCCATAATAAGAATTCT -GCAACAACTGCTGTTTATTCATTTCAGGATTGGGTGTCGACATAGCAGAATAGGCATTAC -TCTACAAAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATC -CAGGAAGTCAGCCTAAGACGGCTTGTAACACTTGCTATTGTAAAAAATGTTGCTTTCATT -GCCAAGTTTGTTTCACAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGAC -AGCGACGAAGAGCTCCTCAGGACCGTCAGACTCATCAAGCTTCTCTATCAAAGCAGTAAG -TAGTATATGTAATGCACTCTTTACAAATATTAGGAATAGTAGCATTAGTAGTAGCAGGAA -TAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAA -GGAAAATAGACAGGTTAATTGATAGAATAATAGAAAGAGCAGAAGACAGTGGCAATGAGA -GTGAAGGAGATCAGGAAGAATTGTCAGCACTTGTGGAGATGGGGCATCTTGCTCCTTGGG -ATATTAATGATTAGTAGTGCTGCAGAAAATTTGTGGGTCACAGTCTATTATGGGGTACCT -GTGTGGAAAGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACA -GAAGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAA -GTAGTATTGGTAAATGTGACAGAAAATTTTAAYATGTGGACAAATAACATGGCAGAACAG -ATGCATGAGGATATAATCAGTTTATGGGATCAAAGTCTAAAGCCATGTGTAAAATTAACT -CCACTCTGTGTTACTTTAAATTGCACTGATTTGAGAAATACTACTAATACCAATAGTACC -GCCGAGGAAATGGAGGCGAAAGGAGAAATGAAAAACTGCTCTTTCAATATCACCACAAGC -ATAAGGAATAAGTTGCAGAAAGAATATGCACTCTTTTATAAACTTGATATAGTACCAATA -AATAATGATAATACTAGCTATAGGTTGATAAGTTGTAACACCTCAGTCATTACCCAGGCC -TGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCG -ATTCTAAAGTGTAATGATAAGAAGTTCAGTGGAAACGGACCATGTAAAAATGTCAGCACA -GTACAATGTACACATGGAATTAAGCCAGTAGTATCAACTCAACTGCTGCTAAATGGCAGT -CTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTTACAGACAATGCTAAAACCATA -ATAGTACAGCTGAAAGAACCTGTAGAAATTAATTGTACAAGACCTAACAACTATACAAGG -AAAAGAATAACTATGGGACCAGGGAGAGTATATTATACAACAGGAGAAATAATAGGAGAT -ATAAGACGAGCACATTGTAACATTAGTAGCACAAAATGGAATAACACTTTAGGACAGATA -GTTAAAAAATTAAAAGAACAATTTAACAATAATACAATAGTCTTTAAGAAATCCTCAGGA -GGGGACCCAGAAATTGTAATGCACAGTTTTATTTGTGGAGGGGAATTTTTCTTCTGTAAT -TCAACAAAACTGTTTAATAGTACTTGGAATAGCACTGAAGGAAATGACGATGGAGAGGAA -AGAAATATCACACTCCCATGCAGAATAAAACAAATTGTAAACATGTGGCAGGAAGTAGGA -AAAGCAATGTATGCCCCTCCCATCGGAGGACAAATTAGATGCACCTCAAATATTACAGGG -CTGCTATTAACAAGAGATGGAGGTAACCAAAATGGGACCAACGAGACTGAAATCTTCAGA -CCTGGAGGAGGAAATATGAGGGACAATTGGAGAAGTGAACTATATAAATATAAAGTAGTA -AAAATTGAACCATTAGGAGTAGCACCCACTAAGGCAAAGAGAAGAGTGGTGCAGAGAGAA -AAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACT -ATGGGCGCAGCGTCAGTGACGCTGACGGTACAGGCCAGACTATTATTGTCTGGTATAGTG -CAACAGCAGAACAATCTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACA -GTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCTTAGCTGTGGAAAGATACCTAAAGGAT -CAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATCTGCACCACTGCTGTGCCT -TGGAATGCTAGTTGGAGTAATAAATCTCTGGATGAGATTTGGAATAACATGACCTGGATG -CAATGGGAAAGAGAAATTAACAATTACACAGGCTTAATATACACCTTAATTGAAGAATCG -CAAAACCAACAAGAAAAGAATGAACTAGATTTACTGCAATTAGATAAATGGGCAAGTTTG -TGGAATTGGTTTGACATAACAAACTGGCTGTGGTATATAAAAATATTCATAATGATAGTA -GGAGGATTGGTAGGTTTAAGAATAATTTTTACTGTACTTTCTATAGTGAATAGGGTTAGG -CAGGGATACTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGG -CCCGGAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACACATCCGGACGCTTAGTG -GATGGATTCTTAGCAATTTTCTGGGTCGATCTGCGGAACCTGTGCCTCTTCAGCTACCAC -CGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGG -TGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAAT -AGTGCTGTTAGCTTGCTCAATGCCACAGCTATAGCAGTAGCTGAGGGGACAGATAGGGTT -ATAGAAGTATTACAAAGAGTTTATAGAGCTATTCTCAACATACCTACAAGAATCAGACAG -GGCTTGGAAAGGGCTTTGCTATAAGATGGGTAGCAAGTGGTCAAAAATGAGTGGGTGGCC -TGCTGTAAGGGAAAGAATGAGAAGAACTAAGCCAGCTGAGCCAGCAGCAGATGGAGTGGG -AGCAGCATCTAGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTAC -CAATGCTGATTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGGGGAGGTGGGTTTCCCAGT -CAAACCTCAGGTACCTTTAAGACCAATGACTTACAAGGGAGCTCTAGATCTTAGCCACTT -TTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAACAAAGACAAGATATCCT -TGATCTGTGGGTCTACCATACACAAGGCTACTTCCCTGATTGGCAGAATTACACACCAGG -GCCAGGGGTCAGATTCCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGAGCC -AGACAAGGTAGAAGAGGCCAATGAAGGGGAAAACAACTGCTTGTTACACCCTATGAGCCA -GCATGGGATGGAAGACCCGGAGAAAGAAGTGTTAATGTGGAAGTTTGACAGCCGCCTGGC -ATTGCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAGGACTGCTGACATCGAGC -TTTCTACAAGGGACTTTCCGCTGGGGACTTTCCAGGGAGGCGTGGCCTGGGCGGGACTGG -GGAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTTTGCCTGTACTGGGTCTC -TCTGGTTAGACCAGATCAGAGCCTGGGAGCTCTCTGGCTAACTAGGGAACCCACTGCTTA -AGCCTCAATAAAGCTTGCCTTGAGTGCTTCAAGTAGTGTGTGCCCGTCTGTTGTGT ->Ref.28_BF.BR.99.BREPM12817.DQ085874 -ACTAGAGATCCCTCAGACACTTTTAGTCAGTGTGGAAAATCTCTAGCAGTGGCGCCCGAA -CAGGGACCGAAAGCGAAAGTAGAACCAGAGGAGATCTCTCGACGCAGGACTCGGCTTGCT -GAAGTGCGCACGGCAAGAGGCGAGGGGCGGCGACTGGTGAGTACGCCAATTTTTGACTAG -CGGAGGCTAGAAGGAGAGAGATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAG -ATAGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATG -TAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTTTTAGAGACAT -CAGAAGGCTGTAGACAAATACTGGAACAGCTACAACCATCCCTTAAGACAGGATCAGAAG -AACTTAAATCATTATTTAATACAGTAGCAGTCCTCTATTGTGTGCATCAAAAGATAGAGG -TAAAAGATACCAAAGAAGCTTTAGATAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAA -AGGCACAGCAAGCAGCAGCTGGCACAGGAAACAGCAACCAGGCCAGCCAAAATTACCCTA -TAGTGCAGAACCTTCAGGGGCAAATGGTACATCAACCCCTATCACCTAGAACTTTAAATG -CATGGGTAAAGGTGGTAGAAGAGAAGGGTTTTAACCCAGAAGTAATACCCATGTTTTCAG -CATTATCAGAAGGGGCCACTCCACAAGATTTAAACACCATGTTAAATACAGTGGGGGGAC -ATCAAGCAGCCATGCAAATGTTAAAAGACACCATCAATGAGGAAGCTGCAGAATGGGACA -GATTACATCCAGTGCATGCAGGACCTATCCCACCAGGCCAGATGAGGGAACCTAGGGGAA -GTGATATAGCTGGAACAACTAGTAACCTTCAGGAACAAATACAATGGATGACAAGCAACC -CACCTGTCCCAGTGGGAGAAATCTATAAAAGATGGATCATCCTAGGATTAACTACACTAG -TAGGAATGTATAGCCCTGTCAGCATTTTGGACATAAAACAAGGGCCAAAAGACCTTTTTA -GAGACTATGTAGACCGGTTCTTTAAAACCCTAAGACTTGAGCAATGTACACAGGAAGTAA -AAGGTTGGATGACAGACACCTTGTTGGTTCCAAATGCGAACCCCGATTGTAAGACCTTTT -TAAAAGCTTTGGGCCCAGGGCCTTCACTTGAAGAACTGACGACCCCTGGTCCGGGAGTGG -GAGGACCTAGCCATAAGGCAAGAGTTTTGGCTGAGGCAATGAGCCAAACAACAAGTGCAG -CTGTAATGATGCAGAAAAGTAACTTTAAGGGCCAAAGAAGAATTGTTAAATGTTTTAATT -GTGGCAAAGAAGGACACATAGCCAAAAATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGA -AATGTGGAAAGGAAGGACATCAAATGAAAGACTGCACTGAAAGACAGGCTAATTTTTTAG -GGAGAATTTGGCCTTCCCACAAGGGAAGGCCCGGGAATTTCCTTCAGAGCAGGCCAGAGC -CATCAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAGGAGACAACAACTCCCCCTCAGA -AGCAGGAGCCGATAGACAAGGAACTGTATCCTTTGGCTTCCCTCAAATCACTCTTTGGCA -ACGACCAATAGTCACAGTAAAGATAGAGGGACAGCCAAAGGAAGCTCTATTAGATACAGG -AGCAGATGATACAGTATTAGAAGACATAAATTTGCCAGGAAAATGGAAACCAAAAATGAT -AGGGGGAATTGGAGGTTTTATCAAAGTAAAACAGTATGATAACATATCCATAGACATTTG -TGGACACAAGGCTACAGGTACAGTATTAGTAGGGCCTACACCTGTCAACATAATTGGAAG -AAATCTGTTGACTCAGATTGGCTGTACTTTAAATTTCCCCATTAGTCCTATTGAAACTGT -ACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGA -AGAAAAAATAAAAGCATTAATAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTC -AAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAGGACAG -TACTAAATGGAGAAAATTAGTAGATTTCAGAGAGCTTAATAAGAAAACTCAAGACTTCTG -GGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGT -ACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATAAGGAATTCAGGAAGTATAC -TGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGT -GCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGCAGCATGACAAAAATCTT -AGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTA -TGTAGGATCTGACTTAGAAATAGAGCAGCATAGAACAAAAGTAGAGGAACTGAGACAACA -TCTGTTAAGGTGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCCCCATTCCT -TTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGA -AAAGGACAGCTGGACTGTCAATGACATACAGAAATTAGTAGGAAAATTGAATTGGGCAAG -TCAGATTTATCCAGGGATTAAAGTAAGGCAATTATGTAAGCTCCTTAGGGGAACCAAAGC -ACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGA -GATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGA -AATACAGAAGCAGGGGCAATGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAA -TCTGAAAACAGGAAAGTATGCAAGAACGAGGGGTGCCCACACAAATGATGTAAAACAACT -AACAGAAGCAGTGCAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAGATTCCTAA -ATTTAAATTGCCCATACAGAAAGAGACATGGGAAACATGGTGGATAGAGTATTGGCAAGC -CACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCA -GTTAGAAAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAG -GGAAACTAAAAAAGGAAAAGCAGGATATGTTACTGACAGAGGAAGACAAAAAGTTGTTCC -CCTGACGGACACAACAAATCAGAAGACTGAGCTACAAGCAATCCATTTAGCTTTGCAGGA -TTCGGGAGTAGAGGTAAACATAGTAACAGACTCCCAATATGCATTAGGAATCATCCAAGC -ACAACCAGATAAAAGTGAATCAGAGTTAGTTAGTCAAATAATAGAGCAGTTAATAAAAAA -GGAAAAAGTCTATTTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGT -AGATAAATTAGTCAGTGCTGGAATCAGAAAGATACTATTTTTAGATGGAATAGATAAGGC -CCAAGAGGAACATGAGAAATATCACAGTAATTGGAGATCAATGGCTAGTGATTTTAACCT -GCCACCTATAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGA -AGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAGCTAGATTGTACACACTT -AGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATACATAGAAGCAGAAGT -TATTCCAGCAGAGACAGGGCAAGAAACAGCATACTTTATCTTAAAATTAGCAGGAAGATG -GCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCATCAGTAACACGGTTAAGGC -CGCCTGTTGGTGGGCGGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCA -AGGAGTAATAGAATCTATGAATAAGGAATTAAAGAAAATTATAGGACAGGTAAGAGATCA -GGCTGAACATCTTAAAACAGCAGTGCAAATGGCAGTATTCATTCACAATTTTAAAAGAAA -AGGGGGGATTGGGGGGTACAGTGCAGGAGAAAGAATAGTAGACATAATAGCAACAGACAT -ACAAACTAAAGAACTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAG -GGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTGCTCTGGAAAGGTGAAGGGGC -AGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCAT -TAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGA -TTAGAACATGGAACAGTTTAGTAAAACACCATGTGTATGTTTCAAGGAAAGCTAGGAATT -GGGTTTATAAACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCC -CACTAGGGGATGCTAAATTAGTAGTAATAACTTATTGGGGTCTGCATACAGGAGAAAGAG -ACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAGGAGATATAGCACACAAA -TAGACCCTGACCTAGCAGACCAACTAATTCACCTGTATTATTTTGATTGTTTTTCAGAAT -CTGCTATAAGAAATGCCATATTAGGACGTATAGTTAGACCTAGGTGTGATTATCAAGCAG -GACATAACAAGGTAGGATCTCTACAGTACTTGGCACTAACAGCATTAGTAACACCAAAAA -AGATAAAGCCACCTTTGCCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACAAGCCCC -AGCAGATCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAGAGCTTTTAGAGGA -ACTTAAGAATGAAGCTGTTAGACATTTTCCTAGACCATGGCTCCATGGCTTAGGACAACA -TATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCA -ACAACTGCTGTTCATTCATTACAGAATTGGATGTCAACATAGCAGAATAGGCATTACTCG -ACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAACACCCAG -GGAGTCAGCCTAGGACTGCCTGTACCAAATGCTACTGTAAAAAGTGTTGCTTTCATTGCC -AAGTTTGTTTCATAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGC -GACGAAGAACTCCTCAAGATAGTCAGACTCATCAAGTTTCTCTATCAAAACAGTAAGTAT -TACATGTAATGCACTCTTTACAAATATTAGCAATAGTAGCATTAGTAGTAGTAGCAATAA -TAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGGCAAAGAA -GAATAGACAGGATAATTGAGAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGCG -AAGGGGACCAGGAAGAATTACTGGTGGAAATGGGGCATGATGCTCCTTGGGATGTTAATG -ATCTGTAATGCTGCAGAACAATCGTGGGTCACAGTCTACTATGGGGTGCCTGTGTGGAAA -GAAGCATCCACCACCTTATTTTGTGCATCAGATGCTAGCGCAGTTGACACAGAGGTACAT -AATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCTAACCCACAGGAAATAGTATTG -GAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGATCAAATGCATGAA -GATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGT -GTTACTTTAAATTGCACTAATTGGAATACTACTAATAAGTTGAATGCTACTGATACCAAT -AGTAGTAGAATAGAGGAAGAAATGAAGGAAGAAATGAAAAACTGCTCTTTCAATGTCACC -TCAAGCATAGGAAATAAGATGCAGAAAGAATATGCACTTTTTTATAAACTTGATGTAGTA -CCAATAAATAATGATAGTACAAGCTATACACTGATAAATTGTAACACCTCAGTCATTACA -CAAGCCTGTCCAAAGGTATCCTTTGAACCAATTCCCATACATTATTGTACCCCAGCTGGT -TTTGCGCTTCTAAAGTGTAATGATAAGAAATTCAATGGAACAGGACCATGTACAAATGTC -AGTACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAAT -GGCAGTCTAGCAGAAGAAGAAATAGTGATTAGATCTGAAAATTTCTCGAACAATGCTAAA -ACCATAATAGTACAGCTGAATGAAACTGTAGAAATTACTTGTGAAAGACCCAACAACAAT -ACAAGAAAAGGTATACATCTAGGATTTGGGAGAGCATTGTATGCAACAGGAGAAATAATA -GGAGATATAAGACAAGCATATTGTAACCTTAATAGAACAAAATGGGAAAACACTTTAAAA -CGGATAGTTACAAAATTAGGAGAACAATTTAAAAATCAAAATAAAACAATAACCTTCGAT -CTCTCCTCAGGAGGGGACCCAGAAATTATGCTGCACAGTTTCAGTTGTGGAGGGGAATTC -TTCTACTGTAATACAACACAGCTGTATAATAGTAATAGGAAGCAGGAGAATAATGGCACT -TGGAATAATAATGGGAGTAATACTAATGATAATATCACACTCCCATGCAGAATAAAACAA -ATTATAAACAGGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCACAGGAGCA -ATTAGATGTTTATCAAATATTACAGGGCTGTTATTAACAAGAGATGGTGGAACGAGAGCG -AATCTGTCCGAGACCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAA -TTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAAGGCAAAG -AGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAACAATAGGAGCTATGTTCCTTGGG -TTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATGACGCTGACGGTACAAGCC -AGACAATTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGGGCTATTGAGGCG -CAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTG -GCTGTAGAAAGATACCTAAAGGATCAACAGCTCCTAGGGATTTGGGGTTGCTCTGGAAAA -CTCATCTGCACCACTGCTGTGCCTTGGAATACTAGTTGGAGTAATAAATCTCTAGATGAA -ATTTGGAACAACATGACCTGGATGCAGTGGGAAAAAGAAATTGACAATTACACAGGCGTA -ATATACAATTTAATTGAAGAAGCGCAAAACCAACAAGAAAAGAATGAACAAGAATTATTG -CAATTGGATACGTGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTAT -ATAAAGATATTCATAATGATAGTAGGAGGCTTAGTAGGTTTAAGAATAGTTTTTGCTGTA -CTCTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTCCAGACCCGCCTC -CCAGCCCAGAGGGGACCCGACAGGCCCGGAGGAATCGAAGAAGAAGGTGGAGAGAGAGAC -AAGGACAGATCCATAAGATTAGTAGATGGATTCTTAGCAATCATCTGGGAAGACCTACGG -AGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTGACGAGGGTT -GTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGCTGGAATCTCCTGCAG -TATTGGAGTCAGGAACTAAGGAATAGTGCTGTTAGTTTGCTTAATGCCACAGCTATAGCA -GTAGCTGAGGGAACAGATAGGATTATAGAAATAGTACAAATAATTTGTAGAGCTATTCTC -CACATACCTAGACGAATAAGACAGGGCTTAGAAAGAGCTTTGCAATAAGATGGGTGGCAA -GTGGTCAAAATGTAGCATGGGTGGGTGGACTGCTGTAAGGGAAAGAATGAGACGAACTGA -GCCAGCAACTGAGCCAGCAGCAGATGGGGTGGGAGCAGTATCTCGAGACTTGGAAAAACA -TGGAGCAATTACAAGTAGCAATACAGCAGCTACTAATGCTGACTGTGCCTGGCTAGAAGC -ACAAGAGGAGGAGGAAGTGGGCTTCCCAGTCAGACCTCAGGTACCTTTAAGACCAATGAC -TTACAAAGGAGCTTTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCT -AATTTACTCCCAGAAAAGACAAGAGATCCTTGATCTGTGGGTCTACAACACACAAGGATA -CTTCCCTGATTGGCAGAACTACACAAAAGGGCCAGGGATCAGATATCCACTGACCTTCGG -ATGGTGCTTCAAGCTAGTGCCAGTTGATCCAGAACAGGTAGAAAAGGCCAATGAAGGAGA -GAACAACAGCCTGTTACACCCTATGAGCCAGCATGGGATGGATGACCCAGAGAAAGAAGT -GTTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCGCCACGTAGCCAGAGAGCTGCATCC -GGAGTATTACAAAGACTGCTGACATCGAGTTTTCTACATGGGACTTTCCACTGGGGACTT -TCCAGGGAGG diff --git a/tests/expected-results-large-hxb2/defects.json b/tests/expected-results-large-hxb2/defects.json index bf78e47..9e26dfe 100644 --- a/tests/expected-results-large-hxb2/defects.json +++ b/tests/expected-results-large-hxb2/defects.json @@ -1,1639 +1 @@ -{ - "KX505501.1": [ - { - "qseqid": "KX505501.1", - "error": "Frameshift", - "message": "ORF 'pol' at 1629-1746 contains out of frame indels that impact 1950 positions.", - "region": "pol" - }, - { - "qseqid": "KX505501.1", - "error": "Deletion", - "message": "ORF 'pol' exceeds maximum deletion tolerance. Contains 2892 deletions with max tolerance of 93 deletions.", - "region": "pol" - }, - { - "qseqid": "KX505501.1", - "error": "SequenceDivergence", - "message": "ORF 'pol' exceeds maximum distance tolerance. It is 2.13586 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.88345.", - "region": "pol" - }, - { - "qseqid": "KX505501.1", - "error": "MutatedStopCodon", - "message": "ORF 'pol' has a mutated stop codon: 'T--'.", - "region": "pol" - }, - { - "qseqid": "KX505501.1", - "error": "Frameshift", - "message": "ORF 'env' at 1747-1746 contains out of frame indels that impact 1714 positions.", - "region": "env" - }, - { - "qseqid": "KX505501.1", - "error": "Deletion", - "message": "ORF 'env' exceeds maximum deletion tolerance. Contains 2346 deletions with max tolerance of 54 deletions.", - "region": "env" - }, - { - "qseqid": "KX505501.1", - "error": "SequenceDivergence", - "message": "ORF 'env' exceeds maximum distance tolerance. It is 2.11186 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.35595.", - "region": "env" - }, - { - "qseqid": "KX505501.1", - "error": "Frameshift", - "message": "ORF 'vif' at 1747-1746 contains out of frame indels that impact 386 positions.", - "region": "vif" - }, - { - "qseqid": "KX505501.1", - "error": "Deletion", - "message": "ORF 'vif' exceeds maximum deletion tolerance. Contains 354 deletions with max tolerance of 12 deletions.", - "region": "vif" - }, - { - "qseqid": "KX505501.1", - "error": "SequenceDivergence", - "message": "ORF 'vif' exceeds maximum distance tolerance. It is 2.04883 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.78802.", - "region": "vif" - }, - { - "qseqid": "KX505501.1", - "error": "Frameshift", - "message": "ORF 'vpr' at 1747-1746 contains out of frame indels that impact 194 positions.", - "region": "vpr" - }, - { - "qseqid": "KX505501.1", - "error": "Deletion", - "message": "ORF 'vpr' exceeds maximum deletion tolerance. Contains 66 deletions with max tolerance of 6 deletions.", - "region": "vpr" - }, - { - "qseqid": "KX505501.1", - "error": "SequenceDivergence", - "message": "ORF 'vpr' exceeds maximum distance tolerance. It is 2.00365 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.58981.", - "region": "vpr" - }, - { - "qseqid": "KX505501.1", - "error": "Frameshift", - "message": "ORF 'tat_exon1' at 1747-1746 contains out of frame indels that impact 144 positions.", - "region": "tat_exon1" - }, - { - "qseqid": "KX505501.1", - "error": "Deletion", - "message": "ORF 'tat_exon1' exceeds maximum deletion tolerance. Contains 69 deletions with max tolerance of 0 deletions.", - "region": "tat_exon1" - }, - { - "qseqid": "KX505501.1", - "error": "SequenceDivergence", - "message": "ORF 'tat_exon1' exceeds maximum distance tolerance. It is 2.21806 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.85696.", - "region": "tat_exon1" - }, - { - "qseqid": "KX505501.1", - "error": "Frameshift", - "message": "ORF 'rev_exon1' at 1747-1746 contains out of frame indels that impact 51 positions.", - "region": "rev_exon1" - }, - { - "qseqid": "KX505501.1", - "error": "Insertion", - "message": "ORF 'rev_exon1' exceeds maximum insertion tolerance. Contains 69 insertions with max tolerance of 0 insertions.", - "region": "rev_exon1" - }, - { - "qseqid": "KX505501.1", - "error": "SequenceDivergence", - "message": "ORF 'rev_exon1' exceeds maximum distance tolerance. It is 2.25 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.92308.", - "region": "rev_exon1" - }, - { - "qseqid": "KX505501.1", - "error": "Frameshift", - "message": "ORF 'vpu' at 1747-1746 contains out of frame indels that impact 166 positions.", - "region": "vpu" - }, - { - "qseqid": "KX505501.1", - "error": "Deletion", - "message": "ORF 'vpu' exceeds maximum deletion tolerance. Contains 24 deletions with max tolerance of 6 deletions.", - "region": "vpu" - }, - { - "qseqid": "KX505501.1", - "error": "SequenceDivergence", - "message": "ORF 'vpu' exceeds maximum distance tolerance. It is 2.044 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.07949.", - "region": "vpu" - }, - { - "qseqid": "KX505501.1", - "error": "Frameshift", - "message": "ORF 'tat_exon2' at 1747-1746 contains out of frame indels that impact 62 positions.", - "region": "tat_exon2" - }, - { - "qseqid": "KX505501.1", - "error": "Deletion", - "message": "ORF 'tat_exon2' exceeds maximum deletion tolerance. Contains 15 deletions with max tolerance of 0 deletions.", - "region": "tat_exon2" - }, - { - "qseqid": "KX505501.1", - "error": "SequenceDivergence", - "message": "ORF 'tat_exon2' exceeds maximum distance tolerance. It is 2.24194 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.18333.", - "region": "tat_exon2" - }, - { - "qseqid": "KX505501.1", - "error": "Frameshift", - "message": "ORF 'rev_exon2' at 1747-1746 contains out of frame indels that impact 184 positions.", - "region": "rev_exon2" - }, - { - "qseqid": "KX505501.1", - "error": "Deletion", - "message": "ORF 'rev_exon2' exceeds maximum deletion tolerance. Contains 198 deletions with max tolerance of 7 deletions.", - "region": "rev_exon2" - }, - { - "qseqid": "KX505501.1", - "error": "SequenceDivergence", - "message": "ORF 'rev_exon2' exceeds maximum distance tolerance. It is 2.21413 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.75773.", - "region": "rev_exon2" - }, - { - "qseqid": "KX505501.1", - "error": "Frameshift", - "message": "ORF 'nef' at 1747-1778 contains out of frame indels that impact 425 positions.", - "region": "nef" - }, - { - "qseqid": "KX505501.1", - "error": "Deletion", - "message": "ORF 'nef' exceeds maximum deletion tolerance. Contains 396 deletions with max tolerance of 48 deletions.", - "region": "nef" - }, - { - "qseqid": "KX505501.1", - "error": "SequenceDivergence", - "message": "ORF 'nef' exceeds maximum distance tolerance. It is 2.0371 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.34148.", - "region": "nef" - }, - { - "qseqid": "KX505501.1", - "error": "MutatedStartCodon", - "message": "ORF 'nef' has a mutated start codon: 'AGA'.", - "region": "nef" - }, - { - "qseqid": "KX505501.1", - "error": "MutatedStopCodon", - "message": "ORF 'nef' has a mutated stop codon: 'GAC'.", - "region": "nef" - }, - { - "qseqid": "KX505501.1", - "error": "RevResponseElementDeletion", - "message": "Query Sequence exceeds maximum deletion tolerance in RRE. Contains 265 deletions with max tolerance of 20 deletions.", - "region": null - }, - { - "qseqid": "KX505501.1", - "error": "LongDeletion", - "message": "Query sequence contains a very large deletion.", - "region": null - }, - { - "qseqid": "KX505501.1", - "error": "Scramble", - "message": "Sequence is plus-scrambled.", - "region": null - } - ], - "MN691959": [ - { - "qseqid": "MN691959", - "error": "Frameshift", - "message": "ORF 'vpu' at 5911-6156 contains out of frame indels that impact 122 positions.", - "region": "vpu" - }, - { - "qseqid": "MN691959", - "error": "InternalStop", - "message": "ORF 'vpu' at 5911-6156 contains an internal stop codon at 5974.", - "region": "vpu" - }, - { - "qseqid": "MN691959", - "error": "SequenceDivergence", - "message": "ORF 'vpu' exceeds maximum distance tolerance. It is 1.70488 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.07949.", - "region": "vpu" - }, - { - "qseqid": "MN691959", - "error": "Frameshift", - "message": "ORF 'tat_exon2' at 8238-8330 contains out of frame indels that impact 32 positions.", - "region": "tat_exon2" - }, - { - "qseqid": "MN691959", - "error": "Deletion", - "message": "ORF 'tat_exon2' exceeds maximum deletion tolerance. Contains 45 deletions with max tolerance of 0 deletions.", - "region": "tat_exon2" - } - ], - "MN692074": [ - { - "qseqid": "MN692074", - "error": "Frameshift", - "message": "ORF 'pol' at 2085-4082 contains out of frame indels that impact 676 positions.", - "region": "pol" - }, - { - "qseqid": "MN692074", - "error": "Deletion", - "message": "ORF 'pol' exceeds maximum deletion tolerance. Contains 981 deletions with max tolerance of 93 deletions.", - "region": "pol" - }, - { - "qseqid": "MN692074", - "error": "MutatedStopCodon", - "message": "ORF 'pol' has a mutated stop codon: 'GAT'.", - "region": "pol" - }, - { - "qseqid": "MN692074", - "error": "Frameshift", - "message": "ORF 'env' at 4083-4082 contains out of frame indels that impact 1714 positions.", - "region": "env" - }, - { - "qseqid": "MN692074", - "error": "Deletion", - "message": "ORF 'env' exceeds maximum deletion tolerance. Contains 2073 deletions with max tolerance of 54 deletions.", - "region": "env" - }, - { - "qseqid": "MN692074", - "error": "SequenceDivergence", - "message": "ORF 'env' exceeds maximum distance tolerance. It is 2.05571 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.35595.", - "region": "env" - }, - { - "qseqid": "MN692074", - "error": "Frameshift", - "message": "ORF 'vif' at 4083-4082 contains out of frame indels that impact 386 positions.", - "region": "vif" - }, - { - "qseqid": "MN692074", - "error": "Deletion", - "message": "ORF 'vif' exceeds maximum deletion tolerance. Contains 81 deletions with max tolerance of 12 deletions.", - "region": "vif" - }, - { - "qseqid": "MN692074", - "error": "SequenceDivergence", - "message": "ORF 'vif' exceeds maximum distance tolerance. It is 1.99787 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.78802.", - "region": "vif" - }, - { - "qseqid": "MN692074", - "error": "Frameshift", - "message": "ORF 'vpr' at 4083-4082 contains out of frame indels that impact 194 positions.", - "region": "vpr" - }, - { - "qseqid": "MN692074", - "error": "Insertion", - "message": "ORF 'vpr' exceeds maximum insertion tolerance. Contains 207 insertions with max tolerance of 6 insertions.", - "region": "vpr" - }, - { - "qseqid": "MN692074", - "error": "SequenceDivergence", - "message": "ORF 'vpr' exceeds maximum distance tolerance. It is 1.9951 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.58981.", - "region": "vpr" - }, - { - "qseqid": "MN692074", - "error": "Frameshift", - "message": "ORF 'tat_exon1' at 4083-4082 contains out of frame indels that impact 144 positions.", - "region": "tat_exon1" - }, - { - "qseqid": "MN692074", - "error": "Insertion", - "message": "ORF 'tat_exon1' exceeds maximum insertion tolerance. Contains 249 insertions with max tolerance of 0 insertions.", - "region": "tat_exon1" - }, - { - "qseqid": "MN692074", - "error": "SequenceDivergence", - "message": "ORF 'tat_exon1' exceeds maximum distance tolerance. It is 2.21806 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.85696.", - "region": "tat_exon1" - }, - { - "qseqid": "MN692074", - "error": "Frameshift", - "message": "ORF 'rev_exon1' at 4083-4082 contains out of frame indels that impact 51 positions.", - "region": "rev_exon1" - }, - { - "qseqid": "MN692074", - "error": "Insertion", - "message": "ORF 'rev_exon1' exceeds maximum insertion tolerance. Contains 387 insertions with max tolerance of 0 insertions.", - "region": "rev_exon1" - }, - { - "qseqid": "MN692074", - "error": "SequenceDivergence", - "message": "ORF 'rev_exon1' exceeds maximum distance tolerance. It is 2.25 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.92308.", - "region": "rev_exon1" - }, - { - "qseqid": "MN692074", - "error": "Frameshift", - "message": "ORF 'vpu' at 4083-4082 contains out of frame indels that impact 166 positions.", - "region": "vpu" - }, - { - "qseqid": "MN692074", - "error": "Insertion", - "message": "ORF 'vpu' exceeds maximum insertion tolerance. Contains 249 insertions with max tolerance of 24 insertions.", - "region": "vpu" - }, - { - "qseqid": "MN692074", - "error": "SequenceDivergence", - "message": "ORF 'vpu' exceeds maximum distance tolerance. It is 2.00052 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.07949.", - "region": "vpu" - }, - { - "qseqid": "MN692074", - "error": "Frameshift", - "message": "ORF 'tat_exon2' at 4083-4082 contains out of frame indels that impact 62 positions.", - "region": "tat_exon2" - }, - { - "qseqid": "MN692074", - "error": "Deletion", - "message": "ORF 'tat_exon2' exceeds maximum deletion tolerance. Contains 60 deletions with max tolerance of 0 deletions.", - "region": "tat_exon2" - }, - { - "qseqid": "MN692074", - "error": "SequenceDivergence", - "message": "ORF 'tat_exon2' exceeds maximum distance tolerance. It is 2.24194 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.18333.", - "region": "tat_exon2" - }, - { - "qseqid": "MN692074", - "error": "Frameshift", - "message": "ORF 'rev_exon2' at 4083-4082 contains out of frame indels that impact 184 positions.", - "region": "rev_exon2" - }, - { - "qseqid": "MN692074", - "error": "Deletion", - "message": "ORF 'rev_exon2' exceeds maximum deletion tolerance. Contains 243 deletions with max tolerance of 7 deletions.", - "region": "rev_exon2" - }, - { - "qseqid": "MN692074", - "error": "SequenceDivergence", - "message": "ORF 'rev_exon2' exceeds maximum distance tolerance. It is 2.21413 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.75773.", - "region": "rev_exon2" - }, - { - "qseqid": "MN692074", - "error": "Frameshift", - "message": "ORF 'nef' at 4083-4086 contains out of frame indels that impact 412 positions.", - "region": "nef" - }, - { - "qseqid": "MN692074", - "error": "Deletion", - "message": "ORF 'nef' exceeds maximum deletion tolerance. Contains 123 deletions with max tolerance of 48 deletions.", - "region": "nef" - }, - { - "qseqid": "MN692074", - "error": "SequenceDivergence", - "message": "ORF 'nef' exceeds maximum distance tolerance. It is 2.00372 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.34148.", - "region": "nef" - }, - { - "qseqid": "MN692074", - "error": "MutatedStartCodon", - "message": "ORF 'nef' has a mutated start codon: 'ACC'.", - "region": "nef" - }, - { - "qseqid": "MN692074", - "error": "MutatedStopCodon", - "message": "ORF 'nef' has a mutated stop codon: 'CAG'.", - "region": "nef" - }, - { - "qseqid": "MN692074", - "error": "RevResponseElementDeletion", - "message": "Query Sequence exceeds maximum deletion tolerance in RRE. Contains 265 deletions with max tolerance of 20 deletions.", - "region": null - }, - { - "qseqid": "MN692074", - "error": "LongDeletion", - "message": "Query sequence contains a very large deletion.", - "region": null - } - ], - "MN692145": [], - "MN090335": [ - { - "qseqid": "MN090335", - "error": "Frameshift", - "message": "ORF 'gag' at 482-1665 contains out of frame indels that impact 1230 positions.", - "region": "gag" - }, - { - "qseqid": "MN090335", - "error": "InternalStop", - "message": "ORF 'gag' at 482-1665 contains an internal stop codon at 683.", - "region": "gag" - }, - { - "qseqid": "MN090335", - "error": "MutatedStartCodon", - "message": "ORF 'gag' has a mutated start codon: 'ATA'.", - "region": "gag" - }, - { - "qseqid": "MN090335", - "error": "MutatedStopCodon", - "message": "ORF 'gag' has a mutated stop codon: 'AAA'.", - "region": "gag" - }, - { - "qseqid": "MN090335", - "error": "Frameshift", - "message": "ORF 'tat_exon2' at 7740-7832 contains out of frame indels that impact 32 positions.", - "region": "tat_exon2" - }, - { - "qseqid": "MN090335", - "error": "Deletion", - "message": "ORF 'tat_exon2' exceeds maximum deletion tolerance. Contains 45 deletions with max tolerance of 0 deletions.", - "region": "tat_exon2" - }, - { - "qseqid": "MN090335", - "error": "PackagingSignalDeletion", - "message": "Query sequence exceeds maximum deletion tolerance in PSI. Contains 42 deletions with max tolerance of 10 deletions.", - "region": null - }, - { - "qseqid": "MN090335", - "error": "MajorSpliceDonorSiteMutated", - "message": "Query sequence has a mutated splice donor site: AT. The context is TTAACTGCGAAT-----CGTTC.", - "region": null - }, - { - "qseqid": "MN090335", - "error": "Scramble", - "message": "Sequence is minus-scrambled.", - "region": null - }, - { - "qseqid": "MN090335", - "error": "InternalInversion", - "message": "Sequence contains an internal inversion.", - "region": null - } - ], - "MN090376": [ - { - "qseqid": "MN090376", - "error": "Frameshift", - "message": "ORF 'gag' at 541-1590 contains out of frame indels that impact 754 positions.", - "region": "gag" - }, - { - "qseqid": "MN090376", - "error": "InternalStop", - "message": "ORF 'gag' at 541-1590 contains an internal stop codon at 598.", - "region": "gag" - }, - { - "qseqid": "MN090376", - "error": "SequenceDivergence", - "message": "ORF 'gag' exceeds maximum distance tolerance. It is 1.33506 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.98443.", - "region": "gag" - }, - { - "qseqid": "MN090376", - "error": "MutatedStartCodon", - "message": "ORF 'gag' has a mutated start codon: 'TGC'.", - "region": "gag" - }, - { - "qseqid": "MN090376", - "error": "PackagingSignalDeletion", - "message": "Query sequence exceeds maximum deletion tolerance in PSI. Contains 95 deletions with max tolerance of 10 deletions.", - "region": null - }, - { - "qseqid": "MN090376", - "error": "MajorSpliceDonorSiteMutated", - "message": "Query sequence has a missing splice donor site: --. The context is ----------------------.", - "region": null - }, - { - "qseqid": "MN090376", - "error": "Scramble", - "message": "Sequence is minus-scrambled.", - "region": null - }, - { - "qseqid": "MN090376", - "error": "InternalInversion", - "message": "Sequence contains an internal inversion.", - "region": null - } - ], - "MK115581.1": [], - "MK115690.1": [ - { - "qseqid": "MK115690.1", - "error": "PackagingSignalDeletion", - "message": "Query sequence exceeds maximum deletion tolerance in PSI. Contains 12 deletions with max tolerance of 10 deletions.", - "region": null - }, - { - "qseqid": "MK115690.1", - "error": "MajorSpliceDonorSiteMutated", - "message": "Query sequence has a mutated splice donor site: G-. The context is GCGGCGACTGG-----------.", - "region": null - }, - { - "qseqid": "MK115690.1", - "error": "Scramble", - "message": "Sequence is plus-scrambled.", - "region": null - } - ], - "MK115571.1": [ - { - "qseqid": "MK115571.1", - "error": "PackagingSignalDeletion", - "message": "Query sequence exceeds maximum deletion tolerance in PSI. Contains 79 deletions with max tolerance of 10 deletions.", - "region": null - }, - { - "qseqid": "MK115571.1", - "error": "MajorSpliceDonorSiteMutated", - "message": "Query sequence has a missing splice donor site: --. The context is ----------------------.", - "region": null - } - ], - "MK115514.1": [ - { - "qseqid": "MK115514.1", - "error": "MajorSpliceDonorSiteMutated", - "message": "Query sequence has a missing splice donor site: --. The context is GCGGCGACT--------CGAAA.", - "region": null - } - ], - "MK115488.1": [ - { - "qseqid": "MK115488.1", - "error": "MajorSpliceDonorSiteMutated", - "message": "Query sequence has a missing splice donor site: --. The context is GCGGCGACT--------CGAAA.", - "region": null - } - ], - "MK115030.1": [ - { - "qseqid": "MK115030.1", - "error": "Scramble", - "message": "Sequence is minus-scrambled.", - "region": null - }, - { - "qseqid": "MK115030.1", - "error": "InternalInversion", - "message": "Sequence contains an internal inversion.", - "region": null - } - ], - "MK115498.1": [], - "MK115211.1": [ - { - "qseqid": "MK115211.1", - "error": "Frameshift", - "message": "ORF 'env' at 5688-8198 contains out of frame indels that impact 757 positions.", - "region": "env" - }, - { - "qseqid": "MK115211.1", - "error": "InternalStop", - "message": "ORF 'env' at 5688-8198 contains an internal stop codon at 6354.", - "region": "env" - }, - { - "qseqid": "MK115211.1", - "error": "PackagingSignalDeletion", - "message": "Query sequence exceeds maximum deletion tolerance in PSI. Contains 20 deletions with max tolerance of 10 deletions.", - "region": null - }, - { - "qseqid": "MK115211.1", - "error": "MajorSpliceDonorSiteMutated", - "message": "Query sequence has a missing splice donor site: --. The context is GCGGCG----------------.", - "region": null - }, - { - "qseqid": "MK115211.1", - "error": "Scramble", - "message": "Sequence is minus-scrambled.", - "region": null - }, - { - "qseqid": "MK115211.1", - "error": "InternalInversion", - "message": "Sequence contains an internal inversion.", - "region": null - } - ], - "MK115158.1": [ - { - "qseqid": "MK115158.1", - "error": "PackagingSignalDeletion", - "message": "Query sequence exceeds maximum deletion tolerance in PSI. Contains 20 deletions with max tolerance of 10 deletions.", - "region": null - }, - { - "qseqid": "MK115158.1", - "error": "MajorSpliceDonorSiteMutated", - "message": "Query sequence has a missing splice donor site: --. The context is GCGGCGAACG------------.", - "region": null - } - ], - "MK114705.1": [ - { - "qseqid": "MK114705.1", - "error": "Frameshift", - "message": "ORF 'nef' at 8551-9118 contains out of frame indels that impact 122 positions.", - "region": "nef" - }, - { - "qseqid": "MK114705.1", - "error": "InternalStop", - "message": "ORF 'nef' at 8551-9118 contains an internal stop codon at 8983.", - "region": "nef" - }, - { - "qseqid": "MK114705.1", - "error": "MutatedStopCodon", - "message": "ORF 'nef' has a mutated stop codon: 'ACG'.", - "region": "nef" - } - ], - "MK114856.1": [ - { - "qseqid": "MK114856.1", - "error": "Frameshift", - "message": "ORF 'gag' at 493-2022 contains out of frame indels that impact 828 positions.", - "region": "gag" - }, - { - "qseqid": "MK114856.1", - "error": "InternalStop", - "message": "ORF 'gag' at 493-2022 contains an internal stop codon at 538.", - "region": "gag" - }, - { - "qseqid": "MK114856.1", - "error": "SequenceDivergence", - "message": "ORF 'gag' exceeds maximum distance tolerance. It is 1.871 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.98443.", - "region": "gag" - }, - { - "qseqid": "MK114856.1", - "error": "MutatedStartCodon", - "message": "ORF 'gag' has a mutated start codon: 'ATA'.", - "region": "gag" - }, - { - "qseqid": "MK114856.1", - "error": "Frameshift", - "message": "ORF 'pol' at 1788-4826 contains out of frame indels that impact 1998 positions.", - "region": "pol" - }, - { - "qseqid": "MK114856.1", - "error": "InternalStop", - "message": "ORF 'pol' at 1788-4826 contains an internal stop codon at 1917.", - "region": "pol" - }, - { - "qseqid": "MK114856.1", - "error": "Frameshift", - "message": "ORF 'env' at 5954-8521 contains out of frame indels that impact 1453 positions.", - "region": "env" - }, - { - "qseqid": "MK114856.1", - "error": "InternalStop", - "message": "ORF 'env' at 5954-8521 contains an internal stop codon at 6056.", - "region": "env" - }, - { - "qseqid": "MK114856.1", - "error": "SequenceDivergence", - "message": "ORF 'env' exceeds maximum distance tolerance. It is 1.89012 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.35595.", - "region": "env" - }, - { - "qseqid": "MK114856.1", - "error": "Frameshift", - "message": "ORF 'vif' at 4771-5349 contains out of frame indels that impact 328 positions.", - "region": "vif" - }, - { - "qseqid": "MK114856.1", - "error": "InternalStop", - "message": "ORF 'vif' at 4771-5349 contains an internal stop codon at 4882.", - "region": "vif" - }, - { - "qseqid": "MK114856.1", - "error": "SequenceDivergence", - "message": "ORF 'vif' exceeds maximum distance tolerance. It is 1.92813 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.78802.", - "region": "vif" - }, - { - "qseqid": "MK114856.1", - "error": "MutatedStartCodon", - "message": "ORF 'vif' has a mutated start codon: 'ATA'.", - "region": "vif" - }, - { - "qseqid": "MK114856.1", - "error": "Frameshift", - "message": "ORF 'vpr' at 5289-5579 contains out of frame indels that impact 143 positions.", - "region": "vpr" - }, - { - "qseqid": "MK114856.1", - "error": "InternalStop", - "message": "ORF 'vpr' at 5289-5579 contains an internal stop codon at 5340.", - "region": "vpr" - }, - { - "qseqid": "MK114856.1", - "error": "SequenceDivergence", - "message": "ORF 'vpr' exceeds maximum distance tolerance. It is 1.70619 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.58981.", - "region": "vpr" - }, - { - "qseqid": "MK114856.1", - "error": "MutatedStartCodon", - "message": "ORF 'vpr' has a mutated start codon: 'ATA'.", - "region": "vpr" - }, - { - "qseqid": "MK114856.1", - "error": "Frameshift", - "message": "ORF 'tat_exon1' at 5560-5774 contains out of frame indels that impact 123 positions.", - "region": "tat_exon1" - }, - { - "qseqid": "MK114856.1", - "error": "Deletion", - "message": "ORF 'tat_exon1' exceeds maximum deletion tolerance. Contains 186 deletions with max tolerance of 0 deletions.", - "region": "tat_exon1" - }, - { - "qseqid": "MK114856.1", - "error": "MutatedStartCodon", - "message": "ORF 'tat_exon1' has a mutated start codon: 'ATA'.", - "region": "tat_exon1" - }, - { - "qseqid": "MK114856.1", - "error": "Frameshift", - "message": "ORF 'vpu' at 5791-6039 contains out of frame indels that impact 122 positions.", - "region": "vpu" - }, - { - "qseqid": "MK114856.1", - "error": "InternalStop", - "message": "ORF 'vpu' at 5791-6039 contains an internal stop codon at 5857.", - "region": "vpu" - }, - { - "qseqid": "MK114856.1", - "error": "SequenceDivergence", - "message": "ORF 'vpu' exceeds maximum distance tolerance. It is 1.77195 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.07949.", - "region": "vpu" - }, - { - "qseqid": "MK114856.1", - "error": "Frameshift", - "message": "ORF 'tat_exon2' at 8103-8195 contains out of frame indels that impact 37 positions.", - "region": "tat_exon2" - }, - { - "qseqid": "MK114856.1", - "error": "Deletion", - "message": "ORF 'tat_exon2' exceeds maximum deletion tolerance. Contains 45 deletions with max tolerance of 0 deletions.", - "region": "tat_exon2" - }, - { - "qseqid": "MK114856.1", - "error": "Frameshift", - "message": "ORF 'rev_exon2' at 8104-8379 contains out of frame indels that impact 69 positions.", - "region": "rev_exon2" - }, - { - "qseqid": "MK114856.1", - "error": "InternalStop", - "message": "ORF 'rev_exon2' at 8104-8379 contains an internal stop codon at 8161.", - "region": "rev_exon2" - }, - { - "qseqid": "MK114856.1", - "error": "SequenceDivergence", - "message": "ORF 'rev_exon2' exceeds maximum distance tolerance. It is 0.84783 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.75773.", - "region": "rev_exon2" - }, - { - "qseqid": "MK114856.1", - "error": "Frameshift", - "message": "ORF 'nef' at 8523-9176 contains out of frame indels that impact 403 positions.", - "region": "nef" - }, - { - "qseqid": "MK114856.1", - "error": "InternalStop", - "message": "ORF 'nef' at 8523-9176 contains an internal stop codon at 8724.", - "region": "nef" - }, - { - "qseqid": "MK114856.1", - "error": "SequenceDivergence", - "message": "ORF 'nef' exceeds maximum distance tolerance. It is 1.95 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.34148.", - "region": "nef" - }, - { - "qseqid": "MK114856.1", - "error": "MutatedStartCodon", - "message": "ORF 'nef' has a mutated start codon: 'ATA'.", - "region": "nef" - }, - { - "qseqid": "MK114856.1", - "error": "APOBECHypermutation", - "message": "Query sequence shows evidence of APOBEC3F/G-mediated hypermutation (p = 3.639064030015132e-65).", - "region": null - } - ], - "MK115009.1": [ - { - "qseqid": "MK115009.1", - "error": "Frameshift", - "message": "ORF 'gag' at 303-1821 contains out of frame indels that impact 844 positions.", - "region": "gag" - }, - { - "qseqid": "MK115009.1", - "error": "InternalStop", - "message": "ORF 'gag' at 303-1821 contains an internal stop codon at 348.", - "region": "gag" - }, - { - "qseqid": "MK115009.1", - "error": "SequenceDivergence", - "message": "ORF 'gag' exceeds maximum distance tolerance. It is 1.8444 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.98443.", - "region": "gag" - }, - { - "qseqid": "MK115009.1", - "error": "MutatedStopCodon", - "message": "ORF 'gag' has a mutated stop codon: 'AAA'.", - "region": "gag" - }, - { - "qseqid": "MK115009.1", - "error": "Frameshift", - "message": "ORF 'pol' at 1614-4625 contains out of frame indels that impact 1728 positions.", - "region": "pol" - }, - { - "qseqid": "MK115009.1", - "error": "InternalStop", - "message": "ORF 'pol' at 1614-4625 contains an internal stop codon at 1713.", - "region": "pol" - }, - { - "qseqid": "MK115009.1", - "error": "Frameshift", - "message": "ORF 'env' at 5753-8353 contains out of frame indels that impact 1437 positions.", - "region": "env" - }, - { - "qseqid": "MK115009.1", - "error": "InternalStop", - "message": "ORF 'env' at 5753-8353 contains an internal stop codon at 5849.", - "region": "env" - }, - { - "qseqid": "MK115009.1", - "error": "SequenceDivergence", - "message": "ORF 'env' exceeds maximum distance tolerance. It is 1.87886 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.35595.", - "region": "env" - }, - { - "qseqid": "MK115009.1", - "error": "Frameshift", - "message": "ORF 'vif' at 4570-5148 contains out of frame indels that impact 166 positions.", - "region": "vif" - }, - { - "qseqid": "MK115009.1", - "error": "InternalStop", - "message": "ORF 'vif' at 4570-5148 contains an internal stop codon at 4630.", - "region": "vif" - }, - { - "qseqid": "MK115009.1", - "error": "SequenceDivergence", - "message": "ORF 'vif' exceeds maximum distance tolerance. It is 1.99021 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.78802.", - "region": "vif" - }, - { - "qseqid": "MK115009.1", - "error": "MutatedStartCodon", - "message": "ORF 'vif' has a mutated start codon: 'ATA'.", - "region": "vif" - }, - { - "qseqid": "MK115009.1", - "error": "Frameshift", - "message": "ORF 'vpr' at 5088-5378 contains out of frame indels that impact 88 positions.", - "region": "vpr" - }, - { - "qseqid": "MK115009.1", - "error": "InternalStop", - "message": "ORF 'vpr' at 5088-5378 contains an internal stop codon at 5247.", - "region": "vpr" - }, - { - "qseqid": "MK115009.1", - "error": "SequenceDivergence", - "message": "ORF 'vpr' exceeds maximum distance tolerance. It is 1.28021 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.58981.", - "region": "vpr" - }, - { - "qseqid": "MK115009.1", - "error": "Frameshift", - "message": "ORF 'tat_exon1' at 5359-5573 contains out of frame indels that impact 35 positions.", - "region": "tat_exon1" - }, - { - "qseqid": "MK115009.1", - "error": "Deletion", - "message": "ORF 'tat_exon1' exceeds maximum deletion tolerance. Contains 54 deletions with max tolerance of 0 deletions.", - "region": "tat_exon1" - }, - { - "qseqid": "MK115009.1", - "error": "MutatedStartCodon", - "message": "ORF 'tat_exon1' has a mutated start codon: 'ATA'.", - "region": "tat_exon1" - }, - { - "qseqid": "MK115009.1", - "error": "InternalStop", - "message": "ORF 'vpu' at 5590-5835 contains an internal stop codon at 5815.", - "region": "vpu" - }, - { - "qseqid": "MK115009.1", - "error": "Frameshift", - "message": "ORF 'rev_exon2' at 7936-8211 contains out of frame indels that impact 70 positions.", - "region": "rev_exon2" - }, - { - "qseqid": "MK115009.1", - "error": "InternalStop", - "message": "ORF 'rev_exon2' at 7936-8211 contains an internal stop codon at 7993.", - "region": "rev_exon2" - }, - { - "qseqid": "MK115009.1", - "error": "SequenceDivergence", - "message": "ORF 'rev_exon2' exceeds maximum distance tolerance. It is 0.78261 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.75773.", - "region": "rev_exon2" - }, - { - "qseqid": "MK115009.1", - "error": "Frameshift", - "message": "ORF 'nef' at 8355-8996 contains out of frame indels that impact 395 positions.", - "region": "nef" - }, - { - "qseqid": "MK115009.1", - "error": "InternalStop", - "message": "ORF 'nef' at 8355-8996 contains an internal stop codon at 8796.", - "region": "nef" - }, - { - "qseqid": "MK115009.1", - "error": "SequenceDivergence", - "message": "ORF 'nef' exceeds maximum distance tolerance. It is 1.70049 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.34148.", - "region": "nef" - }, - { - "qseqid": "MK115009.1", - "error": "MutatedStartCodon", - "message": "ORF 'nef' has a mutated start codon: 'ATA'.", - "region": "nef" - }, - { - "qseqid": "MK115009.1", - "error": "APOBECHypermutation", - "message": "Query sequence shows evidence of APOBEC3F/G-mediated hypermutation (p = 1.2040967664149076e-41).", - "region": null - }, - { - "qseqid": "MK115009.1", - "error": "Scramble", - "message": "Sequence is minus-scrambled.", - "region": null - }, - { - "qseqid": "MK115009.1", - "error": "InternalInversion", - "message": "Sequence contains an internal inversion.", - "region": null - } - ], - "MK115387.1": [ - { - "qseqid": "MK115387.1", - "error": "Frameshift", - "message": "ORF 'tat_exon2' at 7870-7962 contains out of frame indels that impact 32 positions.", - "region": "tat_exon2" - }, - { - "qseqid": "MK115387.1", - "error": "Deletion", - "message": "ORF 'tat_exon2' exceeds maximum deletion tolerance. Contains 45 deletions with max tolerance of 0 deletions.", - "region": "tat_exon2" - } - ], - "MK115491.1": [], - "MK116110.1": [ - { - "qseqid": "MK116110.1", - "error": "Frameshift", - "message": "ORF 'gag' at 490-1601 contains out of frame indels that impact 991 positions.", - "region": "gag" - }, - { - "qseqid": "MK116110.1", - "error": "InternalStop", - "message": "ORF 'gag' at 490-1601 contains an internal stop codon at 553.", - "region": "gag" - }, - { - "qseqid": "MK116110.1", - "error": "SequenceDivergence", - "message": "ORF 'gag' exceeds maximum distance tolerance. It is 1.74 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.98443.", - "region": "gag" - }, - { - "qseqid": "MK116110.1", - "error": "MutatedStartCodon", - "message": "ORF 'gag' has a mutated start codon: 'TGC'.", - "region": "gag" - }, - { - "qseqid": "MK116110.1", - "error": "MutatedStopCodon", - "message": "ORF 'gag' has a mutated stop codon: 'AAA'.", - "region": "gag" - }, - { - "qseqid": "MK116110.1", - "error": "PackagingSignalDeletion", - "message": "Query sequence exceeds maximum deletion tolerance in PSI. Contains 93 deletions with max tolerance of 10 deletions.", - "region": null - }, - { - "qseqid": "MK116110.1", - "error": "MajorSpliceDonorSiteMutated", - "message": "Query sequence has a missing splice donor site: --. The context is ----------------------.", - "region": null - }, - { - "qseqid": "MK116110.1", - "error": "InternalInversion", - "message": "Sequence contains an internal inversion.", - "region": null - } - ], - "MK115527.1": [], - "MK114997.1": [ - { - "qseqid": "MK114997.1", - "error": "Frameshift", - "message": "ORF 'env' at 5651-8207 contains out of frame indels that impact 1167 positions.", - "region": "env" - }, - { - "qseqid": "MK114997.1", - "error": "InternalStop", - "message": "ORF 'env' at 5651-8207 contains an internal stop codon at 6695.", - "region": "env" - }, - { - "qseqid": "MK114997.1", - "error": "MutatedStopCodon", - "message": "ORF 'env' has a mutated stop codon: 'AGA'.", - "region": "env" - } - ], - "MK115518.1": [], - "MK115065.1": [ - { - "qseqid": "MK115065.1", - "error": "PackagingSignalDeletion", - "message": "Query sequence exceeds maximum deletion tolerance in PSI. Contains 75 deletions with max tolerance of 10 deletions.", - "region": null - }, - { - "qseqid": "MK115065.1", - "error": "MajorSpliceDonorSiteMutated", - "message": "Query sequence has a missing splice donor site: --. The context is ----------------------.", - "region": null - }, - { - "qseqid": "MK115065.1", - "error": "Scramble", - "message": "Sequence is plus-scrambled.", - "region": null - }, - { - "qseqid": "MK115065.1", - "error": "InternalInversion", - "message": "Sequence contains an internal inversion.", - "region": null - } - ], - "MK115464.1": [ - { - "qseqid": "MK115464.1", - "error": "Frameshift", - "message": "ORF 'gag' at 795-2297 contains out of frame indels that impact 637 positions.", - "region": "gag" - }, - { - "qseqid": "MK115464.1", - "error": "InternalStop", - "message": "ORF 'gag' at 795-2297 contains an internal stop codon at 1257.", - "region": "gag" - }, - { - "qseqid": "MK115464.1", - "error": "SequenceDivergence", - "message": "ORF 'gag' exceeds maximum distance tolerance. It is 1.495 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.98443.", - "region": "gag" - }, - { - "qseqid": "MK115464.1", - "error": "MutatedStartCodon", - "message": "ORF 'gag' has a mutated start codon: 'ATA'.", - "region": "gag" - }, - { - "qseqid": "MK115464.1", - "error": "Frameshift", - "message": "ORF 'pol' at 2090-5101 contains out of frame indels that impact 1716 positions.", - "region": "pol" - }, - { - "qseqid": "MK115464.1", - "error": "InternalStop", - "message": "ORF 'pol' at 2090-5101 contains an internal stop codon at 2189.", - "region": "pol" - }, - { - "qseqid": "MK115464.1", - "error": "Frameshift", - "message": "ORF 'env' at 6229-8799 contains out of frame indels that impact 1449 positions.", - "region": "env" - }, - { - "qseqid": "MK115464.1", - "error": "InternalStop", - "message": "ORF 'env' at 6229-8799 contains an internal stop codon at 6430.", - "region": "env" - }, - { - "qseqid": "MK115464.1", - "error": "SequenceDivergence", - "message": "ORF 'env' exceeds maximum distance tolerance. It is 1.83161 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.35595.", - "region": "env" - }, - { - "qseqid": "MK115464.1", - "error": "Frameshift", - "message": "ORF 'vif' at 5046-5624 contains out of frame indels that impact 248 positions.", - "region": "vif" - }, - { - "qseqid": "MK115464.1", - "error": "InternalStop", - "message": "ORF 'vif' at 5046-5624 contains an internal stop codon at 5253.", - "region": "vif" - }, - { - "qseqid": "MK115464.1", - "error": "SequenceDivergence", - "message": "ORF 'vif' exceeds maximum distance tolerance. It is 1.49427 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.78802.", - "region": "vif" - }, - { - "qseqid": "MK115464.1", - "error": "Frameshift", - "message": "ORF 'tat_exon1' at 5835-6049 contains out of frame indels that impact 129 positions.", - "region": "tat_exon1" - }, - { - "qseqid": "MK115464.1", - "error": "Deletion", - "message": "ORF 'tat_exon1' exceeds maximum deletion tolerance. Contains 195 deletions with max tolerance of 0 deletions.", - "region": "tat_exon1" - }, - { - "qseqid": "MK115464.1", - "error": "MutatedStartCodon", - "message": "ORF 'tat_exon1' has a mutated start codon: 'ATA'.", - "region": "tat_exon1" - }, - { - "qseqid": "MK115464.1", - "error": "Frameshift", - "message": "ORF 'vpu' at 6066-6311 contains out of frame indels that impact 124 positions.", - "region": "vpu" - }, - { - "qseqid": "MK115464.1", - "error": "InternalStop", - "message": "ORF 'vpu' at 6066-6311 contains an internal stop codon at 6132.", - "region": "vpu" - }, - { - "qseqid": "MK115464.1", - "error": "SequenceDivergence", - "message": "ORF 'vpu' exceeds maximum distance tolerance. It is 1.88171 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.07949.", - "region": "vpu" - }, - { - "qseqid": "MK115464.1", - "error": "Frameshift", - "message": "ORF 'tat_exon2' at 8381-8473 contains out of frame indels that impact 37 positions.", - "region": "tat_exon2" - }, - { - "qseqid": "MK115464.1", - "error": "Deletion", - "message": "ORF 'tat_exon2' exceeds maximum deletion tolerance. Contains 45 deletions with max tolerance of 0 deletions.", - "region": "tat_exon2" - }, - { - "qseqid": "MK115464.1", - "error": "Frameshift", - "message": "ORF 'rev_exon2' at 8382-8657 contains out of frame indels that impact 45 positions.", - "region": "rev_exon2" - }, - { - "qseqid": "MK115464.1", - "error": "InternalStop", - "message": "ORF 'rev_exon2' at 8382-8657 contains an internal stop codon at 8439.", - "region": "rev_exon2" - }, - { - "qseqid": "MK115464.1", - "error": "Frameshift", - "message": "ORF 'nef' at 8801-9451 contains out of frame indels that impact 178 positions.", - "region": "nef" - }, - { - "qseqid": "MK115464.1", - "error": "Deletion", - "message": "ORF 'nef' exceeds maximum deletion tolerance. Contains 234 deletions with max tolerance of 48 deletions.", - "region": "nef" - }, - { - "qseqid": "MK115464.1", - "error": "MutatedStartCodon", - "message": "ORF 'nef' has a mutated start codon: 'ATA'.", - "region": "nef" - }, - { - "qseqid": "MK115464.1", - "error": "APOBECHypermutation", - "message": "Query sequence shows evidence of APOBEC3F/G-mediated hypermutation (p = 4.937891251407691e-23).", - "region": null - } - ], - "MK115530.1": [], - "MK115520.1": [ - { - "qseqid": "MK115520.1", - "error": "Frameshift", - "message": "ORF 'pol' at 1988-5004 contains out of frame indels that impact 1225 positions.", - "region": "pol" - }, - { - "qseqid": "MK115520.1", - "error": "InternalStop", - "message": "ORF 'pol' at 1988-5004 contains an internal stop codon at 3188.", - "region": "pol" - }, - { - "qseqid": "MK115520.1", - "error": "MutatedStopCodon", - "message": "ORF 'pol' has a mutated stop codon: 'AGA'.", - "region": "pol" - }, - { - "qseqid": "MK115520.1", - "error": "MajorSpliceDonorSiteMutated", - "message": "Query sequence has a mutated splice donor site: GA. The context is CTGGTAACTAGAGATCGAAAGT.", - "region": null - }, - { - "qseqid": "MK115520.1", - "error": "InternalInversion", - "message": "Sequence contains an internal inversion.", - "region": null - } - ], - "MK115503.1": [], - "MK115570.1": [ - { - "qseqid": "MK115570.1", - "error": "PackagingSignalDeletion", - "message": "Query sequence exceeds maximum deletion tolerance in PSI. Contains 11 deletions with max tolerance of 10 deletions.", - "region": null - }, - { - "qseqid": "MK115570.1", - "error": "MajorSpliceDonorSiteMutated", - "message": "Query sequence has a missing splice donor site: --. The context is GCGGCG-----------CGAAA.", - "region": null - } - ], - "MK115509.1": [], - "MK115702.1": [ - { - "qseqid": "MK115702.1", - "error": "PackagingSignalDeletion", - "message": "Query sequence exceeds maximum deletion tolerance in PSI. Contains 54 deletions with max tolerance of 10 deletions.", - "region": null - }, - { - "qseqid": "MK115702.1", - "error": "MajorSpliceDonorSiteMutated", - "message": "Query sequence has a missing splice donor site: --. The context is ---------------------A.", - "region": null - }, - { - "qseqid": "MK115702.1", - "error": "Scramble", - "message": "Sequence is plus-scrambled.", - "region": null - }, - { - "qseqid": "MK115702.1", - "error": "InternalInversion", - "message": "Sequence contains an internal inversion.", - "region": null - } - ], - "MK115095.1": [ - { - "qseqid": "MK115095.1", - "error": "Frameshift", - "message": "ORF 'gag' at 189-1697 contains out of frame indels that impact 806 positions.", - "region": "gag" - }, - { - "qseqid": "MK115095.1", - "error": "InternalStop", - "message": "ORF 'gag' at 189-1697 contains an internal stop codon at 234.", - "region": "gag" - }, - { - "qseqid": "MK115095.1", - "error": "SequenceDivergence", - "message": "ORF 'gag' exceeds maximum distance tolerance. It is 1.8384 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.98443.", - "region": "gag" - }, - { - "qseqid": "MK115095.1", - "error": "MutatedStartCodon", - "message": "ORF 'gag' has a mutated start codon: 'ATA'.", - "region": "gag" - }, - { - "qseqid": "MK115095.1", - "error": "Frameshift", - "message": "ORF 'pol' at 1490-4501 contains out of frame indels that impact 1865 positions.", - "region": "pol" - }, - { - "qseqid": "MK115095.1", - "error": "InternalStop", - "message": "ORF 'pol' at 1490-4501 contains an internal stop codon at 1589.", - "region": "pol" - }, - { - "qseqid": "MK115095.1", - "error": "Frameshift", - "message": "ORF 'env' at 5629-8229 contains out of frame indels that impact 1426 positions.", - "region": "env" - }, - { - "qseqid": "MK115095.1", - "error": "InternalStop", - "message": "ORF 'env' at 5629-8229 contains an internal stop codon at 5725.", - "region": "env" - }, - { - "qseqid": "MK115095.1", - "error": "SequenceDivergence", - "message": "ORF 'env' exceeds maximum distance tolerance. It is 1.87353 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.35595.", - "region": "env" - }, - { - "qseqid": "MK115095.1", - "error": "Frameshift", - "message": "ORF 'vif' at 4446-5024 contains out of frame indels that impact 332 positions.", - "region": "vif" - }, - { - "qseqid": "MK115095.1", - "error": "InternalStop", - "message": "ORF 'vif' at 4446-5024 contains an internal stop codon at 4557.", - "region": "vif" - }, - { - "qseqid": "MK115095.1", - "error": "SequenceDivergence", - "message": "ORF 'vif' exceeds maximum distance tolerance. It is 1.85469 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.78802.", - "region": "vif" - }, - { - "qseqid": "MK115095.1", - "error": "Frameshift", - "message": "ORF 'tat_exon1' at 5235-5449 contains out of frame indels that impact 35 positions.", - "region": "tat_exon1" - }, - { - "qseqid": "MK115095.1", - "error": "Deletion", - "message": "ORF 'tat_exon1' exceeds maximum deletion tolerance. Contains 54 deletions with max tolerance of 0 deletions.", - "region": "tat_exon1" - }, - { - "qseqid": "MK115095.1", - "error": "MutatedStartCodon", - "message": "ORF 'tat_exon1' has a mutated start codon: 'ATA'.", - "region": "tat_exon1" - }, - { - "qseqid": "MK115095.1", - "error": "Frameshift", - "message": "ORF 'vpu' at 5466-5711 contains out of frame indels that impact 122 positions.", - "region": "vpu" - }, - { - "qseqid": "MK115095.1", - "error": "InternalStop", - "message": "ORF 'vpu' at 5466-5711 contains an internal stop codon at 5532.", - "region": "vpu" - }, - { - "qseqid": "MK115095.1", - "error": "SequenceDivergence", - "message": "ORF 'vpu' exceeds maximum distance tolerance. It is 1.88171 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.07949.", - "region": "vpu" - }, - { - "qseqid": "MK115095.1", - "error": "Frameshift", - "message": "ORF 'rev_exon2' at 7812-8087 contains out of frame indels that impact 69 positions.", - "region": "rev_exon2" - }, - { - "qseqid": "MK115095.1", - "error": "InternalStop", - "message": "ORF 'rev_exon2' at 7812-8087 contains an internal stop codon at 7869.", - "region": "rev_exon2" - }, - { - "qseqid": "MK115095.1", - "error": "APOBECHypermutation", - "message": "Query sequence shows evidence of APOBEC3F/G-mediated hypermutation (p = 8.33506116803153e-40).", - "region": null - } - ], - "MK115490.1": [], - "MK115576.1": [], - "OQ092466": [ - { - "qseqid": "OQ092466", - "error": "MajorSpliceDonorSiteMutated", - "message": "Query sequence has a mutated splice donor site: GC. The context is GCGGCGACTGGCGAGTACGCCA.", - "region": null - } - ], - "OQ092463": [ - { - "qseqid": "OQ092463", - "error": "PackagingSignalDeletion", - "message": "Query sequence exceeds maximum deletion tolerance in PSI. Contains 26 deletions with max tolerance of 10 deletions.", - "region": null - }, - { - "qseqid": "OQ092463", - "error": "MajorSpliceDonorSiteMutated", - "message": "Query sequence has a missing splice donor site: --. The context is ------------GAGTACGCC-.", - "region": null - } - ], - "OQ092465": [ - { - "qseqid": "OQ092465", - "error": "MajorSpliceDonorSiteMutated", - "message": "Query sequence has a mutated splice donor site: GA. The context is GCGGCGACTGGAGAGTACGCC-.", - "region": null - } - ], - "OQ092462": [ - { - "qseqid": "OQ092462", - "error": "PackagingSignalDeletion", - "message": "Query sequence exceeds maximum deletion tolerance in PSI. Contains 23 deletions with max tolerance of 10 deletions.", - "region": null - }, - { - "qseqid": "OQ092462", - "error": "MajorSpliceDonorSiteMutated", - "message": "Query sequence has a missing splice donor site: --. The context is ------------GAGTACGCCA.", - "region": null - } - ], - "OQ092464": [ - { - "qseqid": "OQ092464", - "error": "PackagingSignalDeletion", - "message": "Query sequence exceeds maximum deletion tolerance in PSI. Contains 21 deletions with max tolerance of 10 deletions.", - "region": null - }, - { - "qseqid": "OQ092464", - "error": "MajorSpliceDonorSiteMutated", - "message": "Query sequence has a missing splice donor site: --. The context is GCGGCG----------------.", - "region": null - } - ], - "OQ092467": [ - { - "qseqid": "OQ092467", - "error": "PackagingSignalDeletion", - "message": "Query sequence exceeds maximum deletion tolerance in PSI. Contains 22 deletions with max tolerance of 10 deletions.", - "region": null - }, - { - "qseqid": "OQ092467", - "error": "MajorSpliceDonorSiteMutated", - "message": "Query sequence has a missing splice donor site: --. The context is ------------GAGTACGCCA.", - "region": null - } - ] -} \ No newline at end of file +{} \ No newline at end of file diff --git a/tests/expected-results-large-hxb2/holistic.json b/tests/expected-results-large-hxb2/holistic.json index 5737ccc..9e26dfe 100644 --- a/tests/expected-results-large-hxb2/holistic.json +++ b/tests/expected-results-large-hxb2/holistic.json @@ -1,576 +1 @@ -{ - "KX505501.1": { - "intact": false, - "qlen": 1997, - "hypermutation_probablility": 0.7087072014754221, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 1997, - "blast_sseq_coverage": 0.2498456472525211, - "blast_qseq_coverage": 1.2158237356034052, - "blast_sseq_orfs_coverage": 0.17666166916541728, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 4, - "is_reverse_complement": false - }, - "MN691959": { - "intact": false, - "qlen": 9493, - "hypermutation_probablility": 0.19667690182893238, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9493, - "blast_sseq_coverage": 1.0817040543321672, - "blast_qseq_coverage": 1.1086063415148004, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 3, - "is_reverse_complement": false - }, - "MN692074": { - "intact": false, - "qlen": 4178, - "hypermutation_probablility": 0.36378645339477633, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 4178, - "blast_sseq_coverage": 0.5042189750977567, - "blast_qseq_coverage": 1.1728099569171853, - "blast_sseq_orfs_coverage": 0.411544227886057, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 4, - "is_reverse_complement": false - }, - "MN692145": { - "intact": true, - "qlen": 9689, - "hypermutation_probablility": 0.1672411051048176, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9689, - "blast_sseq_coverage": 1.130479522535501, - "blast_qseq_coverage": 1.1271545051088863, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 3, - "is_reverse_complement": false - }, - "MN090335": { - "intact": false, - "qlen": 9069, - "hypermutation_probablility": 0.1771850809736527, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9069, - "blast_sseq_coverage": 0.9842560197571517, - "blast_qseq_coverage": 1.0603153600176425, - "blast_sseq_orfs_coverage": 1.000374812593703, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 3, - "is_reverse_complement": false - }, - "MN090376": { - "intact": false, - "qlen": 8985, - "hypermutation_probablility": 0.026415767987601813, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 8985, - "blast_sseq_coverage": 0.9784935171846059, - "blast_qseq_coverage": 1.0604340567612687, - "blast_sseq_orfs_coverage": 0.9943778110944528, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 3, - "is_reverse_complement": false - }, - "MK115581.1": { - "intact": true, - "qlen": 9495, - "hypermutation_probablility": 0.6919440876652894, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9495, - "blast_sseq_coverage": 0.987034369211772, - "blast_qseq_coverage": 1.0046340179041602, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 2, - "is_reverse_complement": false - }, - "MK115690.1": { - "intact": false, - "qlen": 9689, - "hypermutation_probablility": 0.051230576250981485, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9689, - "blast_sseq_coverage": 1.004630582424367, - "blast_qseq_coverage": 1.0113530808132933, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 3, - "is_reverse_complement": false - }, - "MK115571.1": { - "intact": false, - "qlen": 9394, - "hypermutation_probablility": 0.8029570594372466, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9394, - "blast_sseq_coverage": 0.9866227618851615, - "blast_qseq_coverage": 1.0113902490951672, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 2, - "is_reverse_complement": false - }, - "MK115514.1": { - "intact": false, - "qlen": 9382, - "hypermutation_probablility": 0.6482462132632603, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9382, - "blast_sseq_coverage": 0.9864169582218564, - "blast_qseq_coverage": 1.0173736943082499, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 2, - "is_reverse_complement": false - }, - "MK115488.1": { - "intact": false, - "qlen": 9623, - "hypermutation_probablility": 0.6534999185838631, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9623, - "blast_sseq_coverage": 1.0255196542498457, - "blast_qseq_coverage": 1.0325262392185388, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 6, - "is_reverse_complement": false - }, - "MK115030.1": { - "intact": false, - "qlen": 9126, - "hypermutation_probablility": 0.032014462397289556, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9126, - "blast_sseq_coverage": 0.994031693764149, - "blast_qseq_coverage": 1.0655270655270654, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 3, - "is_reverse_complement": false - }, - "MK115498.1": { - "intact": true, - "qlen": 9461, - "hypermutation_probablility": 0.83547963060225, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9461, - "blast_sseq_coverage": 0.9866227618851615, - "blast_qseq_coverage": 1.0080329774865235, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 2, - "is_reverse_complement": false - }, - "MK115211.1": { - "intact": false, - "qlen": 9032, - "hypermutation_probablility": 0.11818291879607423, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9032, - "blast_sseq_coverage": 0.995060712080675, - "blast_qseq_coverage": 1.0598981399468557, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 3, - "is_reverse_complement": false - }, - "MK115158.1": { - "intact": false, - "qlen": 9143, - "hypermutation_probablility": 0.06758916889824729, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9143, - "blast_sseq_coverage": 0.9853879399053304, - "blast_qseq_coverage": 1.043749316416931, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 2, - "is_reverse_complement": false - }, - "MK114705.1": { - "intact": false, - "qlen": 9411, - "hypermutation_probablility": 0.14584270737492833, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9411, - "blast_sseq_coverage": 1.098065445564931, - "blast_qseq_coverage": 1.122622463075125, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 6, - "is_reverse_complement": false - }, - "MK114856.1": { - "intact": false, - "qlen": 9477, - "hypermutation_probablility": 1.0, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9477, - "blast_sseq_coverage": 1.0485696645400289, - "blast_qseq_coverage": 1.0812493405085997, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 4, - "is_reverse_complement": false - }, - "MK115009.1": { - "intact": false, - "qlen": 9207, - "hypermutation_probablility": 1.0, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9207, - "blast_sseq_coverage": 0.9965013377238114, - "blast_qseq_coverage": 1.0590854784403172, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 3, - "is_reverse_complement": false - }, - "MK115387.1": { - "intact": false, - "qlen": 9136, - "hypermutation_probablility": 0.5436355526687852, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9136, - "blast_sseq_coverage": 0.9796254373327845, - "blast_qseq_coverage": 1.040936952714536, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 2, - "is_reverse_complement": false - }, - "MK115491.1": { - "intact": true, - "qlen": 9422, - "hypermutation_probablility": 0.8961809048805741, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9422, - "blast_sseq_coverage": 1.0037044659394938, - "blast_qseq_coverage": 1.0299299511780937, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 2, - "is_reverse_complement": false - }, - "MK116110.1": { - "intact": false, - "qlen": 8967, - "hypermutation_probablility": 0.07900386365437118, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 8967, - "blast_sseq_coverage": 0.9846676270837621, - "blast_qseq_coverage": 1.0635664101706257, - "blast_sseq_orfs_coverage": 0.9986256871564217, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 4, - "is_reverse_complement": false - }, - "MK115527.1": { - "intact": true, - "qlen": 9481, - "hypermutation_probablility": 0.770862998910788, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9481, - "blast_sseq_coverage": 0.9867256637168141, - "blast_qseq_coverage": 1.0056956017297753, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 2, - "is_reverse_complement": false - }, - "MK114997.1": { - "intact": false, - "qlen": 9055, - "hypermutation_probablility": 0.05560625344150194, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9055, - "blast_sseq_coverage": 0.9784935171846059, - "blast_qseq_coverage": 1.0516841524019878, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 2, - "is_reverse_complement": false - }, - "MK115518.1": { - "intact": true, - "qlen": 9537, - "hypermutation_probablility": 0.6408152618300496, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9537, - "blast_sseq_coverage": 0.9847705289154147, - "blast_qseq_coverage": 0.9996854356715948, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 3, - "is_reverse_complement": false - }, - "MK115065.1": { - "intact": false, - "qlen": 9214, - "hypermutation_probablility": 0.033954952452739495, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9214, - "blast_sseq_coverage": 1.0080263428689031, - "blast_qseq_coverage": 1.069459518124593, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 6, - "is_reverse_complement": false - }, - "MK115464.1": { - "intact": false, - "qlen": 9663, - "hypermutation_probablility": 1.0, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9663, - "blast_sseq_coverage": 0.9838444124305412, - "blast_qseq_coverage": 0.9893407844354756, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 2, - "is_reverse_complement": false - }, - "MK115530.1": { - "intact": true, - "qlen": 9544, - "hypermutation_probablility": 0.5812621948015355, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9544, - "blast_sseq_coverage": 0.9866227618851615, - "blast_qseq_coverage": 0.9992665549036044, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 2, - "is_reverse_complement": false - }, - "MK115520.1": { - "intact": false, - "qlen": 9589, - "hypermutation_probablility": 0.5225247969864292, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9589, - "blast_sseq_coverage": 0.9786993208479111, - "blast_qseq_coverage": 0.987902805297737, - "blast_sseq_orfs_coverage": 1.012118940529735, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 3, - "is_reverse_complement": false - }, - "MK115503.1": { - "intact": true, - "qlen": 9617, - "hypermutation_probablility": 0.42870398270204335, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9617, - "blast_sseq_coverage": 0.987034369211772, - "blast_qseq_coverage": 0.9953207861079338, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 2, - "is_reverse_complement": false - }, - "MK115570.1": { - "intact": false, - "qlen": 9485, - "hypermutation_probablility": 0.7406166892211931, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9485, - "blast_sseq_coverage": 0.9866227618851615, - "blast_qseq_coverage": 1.0057986294148655, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 2, - "is_reverse_complement": false - }, - "MK115509.1": { - "intact": true, - "qlen": 9353, - "hypermutation_probablility": 0.7883768413537747, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9353, - "blast_sseq_coverage": 0.9866227618851615, - "blast_qseq_coverage": 1.0197797498128942, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 2, - "is_reverse_complement": false - }, - "MK115702.1": { - "intact": false, - "qlen": 9098, - "hypermutation_probablility": 0.1454272422215308, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9098, - "blast_sseq_coverage": 0.9874459765383824, - "blast_qseq_coverage": 1.0596834469114091, - "blast_sseq_orfs_coverage": 1.0198650674662668, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 4, - "is_reverse_complement": false - }, - "MK115095.1": { - "intact": false, - "qlen": 9137, - "hypermutation_probablility": 1.0, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9137, - "blast_sseq_coverage": 0.9907388351512657, - "blast_qseq_coverage": 1.060085367188355, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 2, - "is_reverse_complement": false - }, - "MK115490.1": { - "intact": true, - "qlen": 9347, - "hypermutation_probablility": 0.8875203448314265, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9347, - "blast_sseq_coverage": 0.9848734307470673, - "blast_qseq_coverage": 1.0204343639670483, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 3, - "is_reverse_complement": false - }, - "MK115576.1": { - "intact": true, - "qlen": 9266, - "hypermutation_probablility": 0.8197662757563093, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9266, - "blast_sseq_coverage": 0.9917678534677917, - "blast_qseq_coverage": 1.0342110943233327, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 3, - "is_reverse_complement": false - }, - "OQ092466": { - "intact": false, - "qlen": 9686, - "hypermutation_probablility": 0.3858261890626, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9686, - "blast_sseq_coverage": 1.1015641078411196, - "blast_qseq_coverage": 1.1192442700805285, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 3, - "is_reverse_complement": false - }, - "OQ092463": { - "intact": false, - "qlen": 9605, - "hypermutation_probablility": 0.4036857753053361, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9605, - "blast_sseq_coverage": 1.0979625437332785, - "blast_qseq_coverage": 1.1118167621030712, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 3, - "is_reverse_complement": false - }, - "OQ092465": { - "intact": false, - "qlen": 9659, - "hypermutation_probablility": 0.5473020413873195, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9659, - "blast_sseq_coverage": 1.0981683473965838, - "blast_qseq_coverage": 1.108292783932084, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 3, - "is_reverse_complement": false - }, - "OQ092462": { - "intact": false, - "qlen": 9714, - "hypermutation_probablility": 0.10883643311676816, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9714, - "blast_sseq_coverage": 1.1306853261988064, - "blast_qseq_coverage": 1.1301214741610048, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 3, - "is_reverse_complement": false - }, - "OQ092464": { - "intact": false, - "qlen": 9556, - "hypermutation_probablility": 0.47853703424568406, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9556, - "blast_sseq_coverage": 1.0979625437332785, - "blast_qseq_coverage": 1.115634156550858, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 3, - "is_reverse_complement": false - }, - "OQ092467": { - "intact": false, - "qlen": 9936, - "hypermutation_probablility": 0.6438715160567257, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9936, - "blast_sseq_coverage": 1.1308911298621116, - "blast_qseq_coverage": 1.0962157809983897, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 3, - "is_reverse_complement": false - } -} \ No newline at end of file +{} \ No newline at end of file diff --git a/tests/expected-results-large-hxb2/regions.json b/tests/expected-results-large-hxb2/regions.json index 109a68c..9e26dfe 100644 --- a/tests/expected-results-large-hxb2/regions.json +++ b/tests/expected-results-large-hxb2/regions.json @@ -1,6398 +1 @@ -{ - "KX505501.1": [ - { - "region": "gag", - "start": 336, - "end": 1745, - "orientation": "forward", - "distance": 0.41298449612403343, - "indel_impact": 69, - "protein": "MGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ", - "nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTGGATAGATGGGAAAAAATTCGGCTAAGGCCAGGAGGAAATAAAAGATATAAACTAAAACATATAGTATGGGCAAGCAGAGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGCTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCGACTCTTAAGACAGGATCAGAAGAACTTAAGTCTTTGTATAATACAATAGCAGTCCTCTATTGTGTACATCAAAAAATAGATGTGAAAGACACCAAGGAAGCCTTAGACAAGATAGAAGAAGAGCAAAACAAAATTAGGAAACAAGCACAGCAAACCGCAGCTGCAGGAAGTAGCAGTCAGGTCAGCCACAATTACCCTGTAGTGCAGAATCATCAGGGGCAAATGGTATATCAGGCCCTGTCACCAAGAACTTTAAATGCATGGGTAAAAGTAATAGAGGAAAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGACTTAAACACCATGCTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATTAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAGATTTATAAAAGATGGATAATCCTAGGGTTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCTTTTAGAGACTATGTAGACCGGTTCTATAAAACCCTAAGAGCCGAACAAGCCACACAGGAAGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCGAACCCAGATTGTAAAACTATTTTAAAAGCATTAGGACCAGCAGCCACACTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGGACCCAGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAATGACAAATTCAGCTACAGTAATGATGCAGAAAGGCAATTTTAGGAACCAAAGAAAAACTGTTAAGTGCTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACATCAAATGAAAGACTGTACTGAGAGACAGGCTAATTTTGTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACACCAACT", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1628, - "end": 1745, - "orientation": "forward", - "distance": 2.1358565737051802, - "indel_impact": 1950, - "protein": "FCRENLAFPQGKAGEFPSEQTRANSPTSRELQVWGRDTN", - "nucleotides": "TTTTGTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACACCAACT", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "env", - "start": 1746, - "end": 1745, - "orientation": "forward", - "distance": 2.1118604651162807, - "indel_impact": 1714, - "protein": "MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ", - "nucleotides": "", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "vif", - "start": 1746, - "end": 1745, - "orientation": "forward", - "distance": 2.0488262910798123, - "indel_impact": 386, - "protein": "MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ", - "nucleotides": "", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 1746, - "end": 1745, - "orientation": "forward", - "distance": 2.0036496350364965, - "indel_impact": 194, - "protein": "MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ", - "nucleotides": "", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 1746, - "end": 1745, - "orientation": "forward", - "distance": 2.218055555555555, - "indel_impact": 144, - "protein": "MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPT", - "nucleotides": "", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 1746, - "end": 1745, - "orientation": "forward", - "distance": 2.2499999999999996, - "indel_impact": 51, - "protein": "MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPT", - "nucleotides": "", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 1746, - "end": 1745, - "orientation": "forward", - "distance": 2.044, - "indel_impact": 166, - "protein": "MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ", - "nucleotides": "", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "tat_exon2", - "start": 1746, - "end": 1745, - "orientation": "forward", - "distance": 2.2419354838709675, - "indel_impact": 62, - "protein": "RCIRSTTRTADTELFTRDFPLGTFQ", - "nucleotides": "", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 1746, - "end": 1745, - "orientation": "forward", - "distance": 2.214130434782609, - "indel_impact": 184, - "protein": "RCIRSTTRTADTELFTRDFPLGTFQ", - "nucleotides": "", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 1746, - "end": 1777, - "orientation": "forward", - "distance": 2.03710407239819, - "indel_impact": 425, - "protein": "MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ", - "nucleotides": "AGATGCATCCGGAGTACTACAAGGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MN691959": [ - { - "region": "gag", - "start": 639, - "end": 2141, - "orientation": "forward", - "distance": 0.05400000000000005, - "indel_impact": 0, - "protein": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALEKIEEEQNKSKKKAQQAAADTGHSNQISQNYPIVQNIQGQVVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNXXXPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKECTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPSQKQEPIDKELYPLTSLRSLFGNDPSSQ", - "nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGAGAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAACCAGATCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAGTGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCYWYCTWTCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAAGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCWGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGAGTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1934, - "end": 4945, - "orientation": "forward", - "distance": 0.03585657370517925, - "indel_impact": 0, - "protein": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSLSEAGADRQGTVSFNFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTRALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKITTESIVIWGKTPRFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNKGRQKVVPLTNTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKILFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSATVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAAAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAGAGCATTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGGAAATATGCAAGAACGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGTATAGTAATATGGGGAAAGACTCCTAGATTTAAACTACCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAATATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCTTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGGTATGTTACTAACAAAGGAAGACAAAAGGTTGTCCCCCTAACTAACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAATACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTGCTACGGTTAAGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4890, - "end": 5468, - "orientation": "forward", - "distance": 0.0625, - "indel_impact": 0, - "protein": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYERTHPRISSEVYIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH", - "nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGAACTCATCCAAGAATAAGTTCAGAAGTATACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACATATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACAAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5408, - "end": 5698, - "orientation": "forward", - "distance": 0.0625, - "indel_impact": 0, - "protein": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTQQKRARNGASRS", - "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCAACAGAAGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5679, - "end": 5893, - "orientation": "forward", - "distance": 0.25, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPKTACTTCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRPPQDSQTHQVSLSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCACTTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATGACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5818, - "end": 5893, - "orientation": "forward", - "distance": 0.46153846153846145, - "indel_impact": 0, - "protein": "MAGRSGDSDEDLLKTVRLIKFLYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5910, - "end": 6155, - "orientation": "forward", - "distance": 1.704878048780488, - "indel_impact": 122, - "protein": "MQPIQIAIVALVVAIIIAIVV", - "nucleotides": "ATGCAACCTATACAAATAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGATCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGGAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAGTGTCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATATTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 6070, - "end": 8655, - "orientation": "forward", - "distance": 0.10139372822299642, - "indel_impact": 0, - "protein": "MRVKEKCQHLWRWGWRWGTMLLGILMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDAEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLGNATNTNSSNTNSSSGEMMMEKGEIKNCSFNISTSIRNKVQKEYAFFYKLDIVPIDNDTTSYTLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEDVVIRSANFTDNAKTIIVQLNQSVEINCTRPNNNTRKSIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNATLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQFINMWQEVGKAMYAPPISGQIRCSSNITGLLLTRDGGNNNNGSEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNNMTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPNPGGPDRPEGIEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGAYRAIRHIPRRIRQGLERILL", - "nucleotides": "ATGAGAGTGAAGGAGAAGTGTCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATATTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATGCAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGGGGAATGCTACTAATACCAATAGTAGTAATACCAATAGTAGTAGCGGGGAAATGATGATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAAATAAGGTGCAGAAAGAATATGCATTCTTTTATAAACTTGATATAGTACCAATAGATAATGATACTACCAGCTATACGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGATGTAGTAATTAGATCTGCCAATTTCACAGACAATGCTAAAACCATAATAGTACAGCTGAACCAATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGTATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATGCCACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACACTCCCATGCAGAATAAAACAATTTATAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGCGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAACAACAATGGGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAACAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGCTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATAACATGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAAACCCGGGGGGACCCGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTAGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAATATTGGAGTCAGGAGCTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCTATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTATAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8237, - "end": 8329, - "orientation": "forward", - "distance": 0.19354838709677424, - "indel_impact": 32, - "protein": "RPTSQTRGDPTGPKE", - "nucleotides": "AGACCCACCTCCCAAACCCGGGGGGACCCGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8238, - "end": 8513, - "orientation": "forward", - "distance": 0.16304347826086962, - "indel_impact": 0, - "protein": "DPPPKPGGTRQARRNRRRRWRERQRQIHSISERILSTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTILESGAKE", - "nucleotides": "GACCCACCTCCCAAACCCGGGGGGACCCGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTAGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAATATTGGAGTCAGGAGCTAAAGAATAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8657, - "end": 9277, - "orientation": "forward", - "distance": 0.043689320388349495, - "indel_impact": 0, - "protein": "MGGKWSKSSVIGWPTVRERMRRAEPAADGVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGIRYPLTFGWCYKLVPVEPDKVEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC", - "nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATGGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATCAGATATCCATTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGGTAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGAATGGATGACCCTGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MN692074": [ - { - "region": "gag", - "start": 789, - "end": 2291, - "orientation": "forward", - "distance": 0.14990059642147102, - "indel_impact": 0, - "protein": "MGARASVLSAGELDKWEKIRLRPGGKKKYRLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTIATLYCVHQKIEVKDTKEALEKVEEEQNKSKKKAQQAAADTGNNSQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIIMGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQKGNFRNQKRNVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPAQKQEPIDKELYPLASLRSLFGNDPSSQ", - "nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGTGCGGGAGAATTAGACAAATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAATAGCAACCCTCTATTGTGTGCATCAAAAGATAGAGGTAAAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGCAGCAGCTGACACAGGAAACAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCACCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAGCCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCATGGGATTAAATAAAATAGTAAGGATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGATCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTGAAAGCATTGGGACCAGCCGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAAGCAAGAGTTTTGGCGGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAAAGGCAATTTTAGGAACCAAAAAAGGAATGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCGCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 2084, - "end": 4081, - "orientation": "forward", - "distance": 0.8050695825049854, - "indel_impact": 676, - "protein": "FFREDLAFPQGKAREFPSEQTRANSPTRRELQVWGRDNNSRSEAGADRQGTVSFSFPQITLWQRPLVTIRIGGQLKEALLDTGADDTVLEEMNLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLRKKKSVTVLDVGDAYFSVPLDKEFRKYTAFTIPSRNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPEIVIYQYMDDLYVGSDLEIGQHRIKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEIVPLTKEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYAKMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI", - "nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCGCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAGGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAGAAAGAAAAAATCAGTAACAGTACTGGATGTGGGGGATGCATATTTTTCAGTTCCCTTAGATAAAGAATTTAGGAAATATACTGCATTTACCATACCCAGTAGAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATTTTAGAGCCTTTTAGAAAACAAAATCCAGAAATAGTTATCTATCAATACATGGATGATTTATATGTAGGATCTGATTTAGAAATAGGGCAGCATAGAATAAAAATAGAGGAACTGAGACAACATCTGTTGAGATGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCCCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAATAGTACCACTAACAAAAGAAGCAGAGCTGGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTATATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGTCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAAAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCCATACAAAAAGAAACATGGGAAGCATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAATTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGAT", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "env", - "start": 4082, - "end": 4081, - "orientation": "forward", - "distance": 2.055707762557078, - "indel_impact": 1714, - "protein": "MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI", - "nucleotides": "", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "vif", - "start": 4082, - "end": 4081, - "orientation": "forward", - "distance": 1.997872340425532, - "indel_impact": 386, - "protein": "MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI", - "nucleotides": "", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 4082, - "end": 4081, - "orientation": "forward", - "distance": 1.9950980392156863, - "indel_impact": 194, - "protein": "MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI", - "nucleotides": "", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 4082, - "end": 4081, - "orientation": "forward", - "distance": 2.218055555555555, - "indel_impact": 144, - "protein": "MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPD", - "nucleotides": "", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 4082, - "end": 4081, - "orientation": "forward", - "distance": 2.2499999999999996, - "indel_impact": 51, - "protein": "MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPD", - "nucleotides": "", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 4082, - "end": 4081, - "orientation": "forward", - "distance": 2.0005208333333333, - "indel_impact": 166, - "protein": "MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI", - "nucleotides": "", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "tat_exon2", - "start": 4082, - "end": 4081, - "orientation": "forward", - "distance": 2.2419354838709675, - "indel_impact": 62, - "protein": "TQWRALRCCI", - "nucleotides": "", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 4082, - "end": 4081, - "orientation": "forward", - "distance": 2.214130434782609, - "indel_impact": 184, - "protein": "TQWRALRCCI", - "nucleotides": "", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 4082, - "end": 4085, - "orientation": "forward", - "distance": 2.0037162162162163, - "indel_impact": 412, - "protein": "MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI", - "nucleotides": "ACCC", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MN692145": [ - { - "region": "gag", - "start": 775, - "end": 2280, - "orientation": "forward", - "distance": 0.16267465069860276, - "indel_impact": 0, - "protein": "MGARASVLSGGKLDQWEKIRLRPGGKKKYQLKHIVWASKELERFAVNPGLLETTEGCRQILGQLQPSLQTGSEELKSLYNTVATLYCVHQRIDVKDTKEALDKIEEEQNKSKEKAKQAAADTGNSNQVSQNYPIVQNLQGQMVHQAISPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKESFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPGATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRSQRKIVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKQESTGKELYPPLASLRSLFGNDPSSQ", - "nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAAAATTAGATCAATGGGAGAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATCAATTAAAACATATAGTATGGGCAAGCAAGGAACTAGAACGATTTGCAGTTAATCCTGGCCTGTTAGAAACAACAGAAGGATGTAGACAAATACTGGGACAGCTACAACCATCTCTTCAGACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGATGTAAAAGACACCAAGGAAGCATTAGATAAGATAGAGGAAGAGCAAAACAAGAGTAAGGAAAAAGCAAAACAAGCAGCAGCTGACACAGGAAACAGCAACCAGGTCAGCCAAAATTACCCTATAGTGCAGAACCTTCAGGGCCAAATGGTACACCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAATAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACCACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGGATGTATAGTCCTACCAGCATTCTGGATATAAGACAAGGACCAAAAGAATCCTTTAGAGATTATGTAGACCGGTTCTACAAAACTCTAAGAGCTGAGCAAGCCTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCAAACCCAGATTGTAAGACTATCTTAAAAGCATTGGGACCAGGAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAAGCAAGAGTCTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAGCCAAAGGAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAGGGGCATATAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAAAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGAAATTTCCTTCAGAGCAGGCCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGAGTCGACAGGCAAGGAACTGTATCCTCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 2070, - "end": 5084, - "orientation": "forward", - "distance": 0.10796019900497522, - "indel_impact": 0, - "protein": "FFRENLAFPQGEARKFPSEQARANSPTRRELQVWGRDNNSLSEAGVDRQGTVSSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQISIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALTEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQCSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKESWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGAKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARVRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGASNRETKLGKAGYVTDRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLINKEKVYLAWVPAHKGIGGNEQVDKLVSTGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAREIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKVKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGAAATTTCCTTCAGAGCAGGCCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGAGTCGACAGGCAAGGAACTGTATCCTCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTACCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATTAAAGTAAGACAGTATGATCAGATATCCATAGAAATCTGTGGACATAAAGCTATAGGGACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAAATTGGTTGTACTTTAAATTTCCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAACAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGCACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAAAGAACTCAAGATTTCTGGGAAGTTCAATTAGGGATACCACATCCAGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAGGATTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAATGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAATTGAGACAACATCTGTTGAGGTGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGAAAGCTGGACTGTCAATGACATACAAAAGTTAGTGGGTAAATTGAATTGGGCAAGTCAGATTTATGCAGGGATCAAAGTAAAACAATTATGTAAACTTCTTAGGGGAGCCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTTTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAGATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAGTGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAGCCACTGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAGGTTACCCATACAAAAAGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCATCTAATAGAGAGACTAAATTAGGAAAAGCAGGATATGTTACTGACAGAGGAAGACAGAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAACAGTTAATAAACAAGGAAAAGGTCTACTTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTTAGTACTGGAATTAGAAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGAAGAACATGAGAAATACCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTGGTAGCAAGAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAGCTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGACAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGTAATTTCACTAGTACTACAGTTAAGGCCGCCTGTTGGTGGGCGGGGATCAAGCAGGAGTTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGACCAGGCTGAACATCTTAAAACAGCAGTACAAATGGCAGTATTCATTCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTGATACAAGACAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGTAAAAATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 5029, - "end": 5607, - "orientation": "forward", - "distance": 0.21875, - "indel_impact": 0, - "protein": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKAKGWFYRHHYESHHPRISSEVHVPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPNLADQLIHLYYFDCFSESAIRSAILGHIVSPSCEYQAGHNKVGSLQYLALAALITPKRRKPPLPSVAKLTEDRWNKPQKTKGHRGSHIRNGH", - "nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAAGGGATGGTTTTATAGACATCACTATGAAAGCCATCATCCAAGAATAAGTTCAGAAGTACATGTCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGATTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCCAACCTAGCAGACCAACTGATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAGTGCCATATTAGGACATATAGTTAGTCCTAGTTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCATTAGCAGCATTAATAACACCAAAAAGGAGAAAGCCCCCTTTGCCTAGTGTTGCAAAGCTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACCGAGGGAGCCATATCAGGAATGGACACTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5547, - "end": 5837, - "orientation": "forward", - "distance": 0.34375, - "indel_impact": 0, - "protein": "MEQAPEDQGPPREPYQEWTLELLEELKNEAVRHFPRMWLHGLGQHIYDTYGDTWTGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRTRNGANRS", - "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACCGAGGGAGCCATATCAGGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATGTGGCTCCATGGCTTAGGGCAACATATCTATGACACTTATGGGGATACTTGGACAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTCATTCATTTCAGGATTGGGTGTCAACATAGCAGAATAGGCATTATTCGCCAGAGGAGAACAAGAAATGGAGCCAATAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5818, - "end": 6032, - "orientation": "forward", - "distance": 0.25, - "indel_impact": 0, - "protein": "MEPIDPRLEPWKHPGSQPKTACTSCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRASQDSQTHQVSLSKX", - "nucleotides": "ATGGAGCCAATAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGCCAGCCTAAGACTGCTTGTACCTCTTGCTATTGTAAAAAGTGTTGCTTTCATTGTCAAGTCTGTTTCATGACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5957, - "end": 6032, - "orientation": "forward", - "distance": 0.34615384615384626, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLKTVRLIKFLYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 6049, - "end": 6297, - "orientation": "forward", - "distance": 0.6733333333333333, - "indel_impact": 0, - "protein": "MNSLQISAIVAIVVAIILAIVVWSIVFIEYRKILRQRKIDRLIDRIRERAEDSGNESEGDQEELSTALMEMGHHAPWDVDDL", - "nucleotides": "ATGAACTCTTTACAAATATCAGCAATAGTAGCAATAGTAGTAGCAATAATACTAGCAATAGTTGTGTGGTCTATAGTATTCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGATAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAACTATCAACAGCACTTATGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 6212, - "end": 8782, - "orientation": "forward", - "distance": 0.5647651006711409, - "indel_impact": 0, - "protein": "MRVKGIRKNYQQHLWRWGTMLLGMLMICSAAGNLWVTVYYGVPVWKEATTTLFCASDAKAYGTEVHNVWATHACVPTDPNPQEVRLENVTENFNMWRNPMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDLGQESTNNNITGSIEKGEMKNCSFNISSSKGSKGHKEYASFYKLDVVPIGNNKTNGNYSSYRLISCNTSVITQACPKVSFEPIPIHFCAPAGYAILKCNDKQFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSANFTDNAKTIIVQLNQSVEITCTRPGNNTIRGVHMGPGKAFYAGAITGDIRKAHCIINKAKWNSTLRLVVAKLSEQFRNKTINFTQPSGGDPEIVTHSFNCGGEFFYCNTTKLFNSTWNATHMINGTWNDTEGNITLPCRIKQIINMWQEVGKAMYAPPIRGQIRCSSNITGLLLTRDGGKTNESNTTEIFRPGGGDMRDNWRSELYKYKVVRIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGATSMALTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGVKQLQARVLALERYLKDQQLLGFWGCSGKLICTTDVPWNASWSNKSLGQIWENMTWMEWEREIDNYTSLIYTLITESQIQQEQNEKELLELDKWANLWNWFGITNWLWYIKIFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTRFPAPRGPDRPEGIEEEGGERDRDRSAPLVKGFLALIWVDLRSLCLFSYHRLRDLLLIITRIVELLGRRGWEVLKYLWSLLQYWTQELKNSAVSLLNATAIAVAEGTDRIIEVLQRVFRAILHIPTRIRQGFERALL", - "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAACTATCAACAGCACTTATGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTGCAGGCAATTTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGGTACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTGAGATTGGAAAATGTGACAGAAAATTTTAATATGTGGAGAAATCCAATGGTAGAACAGATGCATGAGGATATAATTAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAGTTGACCCCACTCTGTGTCACTTTAAATTGCACTGATTTGGGACAGGAGAGTACCAATAATAATATCACAGGAAGTATAGAGAAAGGAGAAATGAAAAACTGCTCTTTCAATATCTCCTCAAGCAAAGGAAGTAAGGGGCATAAAGAATATGCATCTTTTTATAAACTTGATGTAGTACCAATAGGTAATAATAAGACTAATGGTAATTATTCTAGCTATAGGTTGATAAGTTGTAATACCTCAGTCATTACACAGGCCTGTCCAAAAGTATCATTTGAGCCAATTCCCATACATTTTTGTGCCCCGGCTGGGTATGCGATTCTAAAGTGTAATGATAAACAATTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAGTGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGTAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGCCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACCAATCTGTAGAAATTACTTGTACAAGACCCGGTAACAATACAATAAGAGGGGTTCACATGGGACCAGGGAAGGCATTTTATGCAGGAGCAATAACAGGAGATATAAGAAAAGCACATTGTATCATTAATAAAGCAAAATGGAACAGTACTTTAAGACTGGTAGTTGCAAAATTAAGTGAGCAATTTAGGAATAAAACAATAAACTTTACTCAACCCTCAGGAGGGGACCCAGAAATTGTAACACACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACAACAAAACTGTTTAATAGTACTTGGAATGCAACACACATGATTAATGGTACTTGGAATGATACTGAGGGAAATATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGAGGACAAATTAGATGTTCATCCAATATTACAGGGTTGCTATTAACAAGAGATGGTGGTAAGACTAACGAGAGCAACACCACCGAGATCTTTAGACCTGGAGGAGGAGATATGAGAGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAGAATTGAACCATTAGGGGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTCTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAACGTCGATGGCGTTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATTTGTTGCAACTCACAGTCTGGGGCGTCAAGCAGCTCCAGGCAAGAGTCCTGGCTCTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGTTTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGATGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGGACAAATTTGGGAGAACATGACCTGGATGGAGTGGGAAAGAGAAATTGATAATTACACAAGCTTAATATACACTTTAATTACAGAATCGCAGATCCAACAAGAACAGAATGAAAAAGAATTGTTGGAATTGGATAAATGGGCAAATTTGTGGAATTGGTTTGGCATAACAAACTGGCTGTGGTACATAAAAATATTTATAATGATAGTAGGAGGCTTAGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTTCAGACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGCTCCATTAGTGAAAGGATTCTTAGCACTTATCTGGGTCGATCTACGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATAATAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGTCCTCAAATACTTGTGGAGTCTCCTACAGTATTGGACACAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGATTATAGAAGTATTACAAAGAGTTTTTAGAGCTATCCTCCATATCCCTACAAGAATAAGACAGGGCTTTGAAAGGGCTTTGCTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8364, - "end": 8456, - "orientation": "forward", - "distance": 0.3870967741935485, - "indel_impact": 0, - "protein": "RPASQPRGDPTGPKESKKKVERETETDPLH", - "nucleotides": "AGACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGCTCCATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8365, - "end": 8640, - "orientation": "forward", - "distance": 0.26086956521739135, - "indel_impact": 0, - "protein": "DPLPSPEGTRQARRNRRRRWRERQRQIRSISERILSTYLGRSTEPVPLQLPPLERLTLDNNEDCGTSGTQGVGSPQILVESPTVLDTGTKE", - "nucleotides": "GACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGCTCCATTAGTGAAAGGATTCTTAGCACTTATCTGGGTCGATCTACGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATAATAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGTCCTCAAATACTTGTGGAGTCTCCTACAGTATTGGACACAGGAACTAAAGAATAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8784, - "end": 9386, - "orientation": "forward", - "distance": 0.40765550239234427, - "indel_impact": 0, - "protein": "MGGKWSKSSLVGWPNVRERMRRAEPAADGVGAVSRDLERHGAITSSNTATNNADCAWLEAQKEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQKRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCFKLVPVEPTDRENDRLLHPASLQGMEDPEGEVLEWRFDSRLAFHHMARELHPEYYKNC", - "nucleotides": "ATGGGGGGCAAGTGGTCAAAAAGTAGTTTGGTTGGATGGCCTAATGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATGGGGTGGGAGCAGTATCTCGAGACTTGGAAAGACATGGAGCAATCACAAGTAGTAATACAGCAACTAACAATGCTGATTGTGCCTGGCTAGAAGCACAAAAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAAAAGACAAGATATTCTTGATCTATGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGAGCCCACTGACAGAGAGAATGACAGATTGCTGCACCCTGCAAGCCTGCAGGGGATGGAAGACCCGGAGGGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTGGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAGTACTACAAGAACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MN090335": [ - { - "region": "gag", - "start": 481, - "end": 1664, - "orientation": "forward", - "distance": 0.9171874999999998, - "indel_impact": 1230, - "protein": "MAYSPVAAPRLLPCALQQATPQDLNTMLNTVGGHQAAMQMLKEVINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQATGTANILMQRGNFRNQRKPVKCFNCGKEGHIAKNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSHKGRPGNFPQSRPEIPQSRPEPTAPPAPPEESFRFGEATTPSQKQETMDKELYPPLTSLRSLFGNDPSSQ", - "nucleotides": "ATATGTTTTAATCTATATTTTTTCTTTCCCCCTGGCCTTAACCGAATTTTCTCCCATCTATCTAATTCTCCCCCGCTCAATACTGACGCTCTCGCACCCATCTCTCTCCTTCTAGCCTCCGCTAGTCAAAAATGGCGTACTCACCAGTCGCCGCCCCTCGCCTCTTGCCGTGCGCGCTTCAGCAAGCCACCCCACAAGATTTGAACACTATGCTAAACACAGTGGGTGGACACCAAGCAGCTATGCAAATGTTAAAAGAGGTCATCAATGAGGAAGCTGCAGAATGGGATAGATTACATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACACATAATCCACCTATCCCAGTAGGAGAGATTTATAAAAGATGGATAATTCTGGGACTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTTTGGACATAAAGCAAGGGCCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTTTAAGAGCCGAGCAAGCTACACAGGAAGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCAAATCCAGATTGTAAAACTATTTTAAAAGCATTAGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGGGGACCCGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGCAACAGGTACAGCCAACATACTGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGCCTGTTAAATGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACACCAAATGAAGGATTGCACTGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCTCACAAGGGAAGGCCAGGGAACTTTCCTCAGAGCCGACCAGAAATTCCTCAGAGCAGACCAGAGCCAACAGCCCCACCAGCCCCTCCAGAAGAGAGCTTCAGGTTTGGGGAAGCAACAACTCCCTCTCAGAAGCAGGAGACAATGGACAAGGAGCTGTATCCTCCTCTAACTTCCCTCAGATCACTCTTTGGCAACGACCCATCGTCACAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1427, - "end": 4468, - "orientation": "forward", - "distance": 0.23952802359881997, - "indel_impact": 0, - "protein": "FFRENLAFSQGKARELSSEPTRNSSEQTRANSPTSPSRRELQVWGSNNSLSEAGDNGQGAVSSSNFPQITLWQRPIVTIKIGGQLKEALLDTGADDTVLEDIDLPGKWKPKMIGGIGGFIKVRQYEQVPIEISGHKAIGTVLVGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDGTKWRKLVDFRELNKRTQEFWEIQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKTNPDIVIYQYMDDLYVGSDLEIGQHRTKVEELRQHLMRWGFTTPDKKHQKEPPFLWMGYELHPNKWTVQPIVLPEKESWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKDPVHGAYYDPAKDLIAELQKQGEGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKISTESIVIWGRTPKFRLPIQKETWETWWTDYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRDNKSGKAGYVTDRGRQKVVTINDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIEKAQEDHEKYHSNWRTMASDFNLPPIVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEEKIILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKTIHTDNGRNFTSNTVKAACWWAGIKQEFGIPYNPQSQGVVESMNNELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATEIQTKELQKQITKIPNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCTCACAAGGGAAGGCCAGGGAACTTTCCTCAGAGCCGACCAGAAATTCCTCAGAGCAGACCAGAGCCAACAGCCCCACCAGCCCCTCCAGAAGAGAGCTTCAGGTTTGGGGAAGCAACAACTCCCTCTCAGAAGCAGGAGACAATGGACAAGGAGCTGTATCCTCCTCTAACTTCCCTCAGATCACTCTTTGGCAACGACCCATCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAGGACATAGATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAATATGAACAGGTACCCATAGAAATTAGCGGACACAAAGCTATAGGTACAGTGTTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGCACTCTAAATTTTCCCATCAGCCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGACGGCCCAAAGGTTAAACAATGGCCATTAACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTGTTTGCCATAAAGAAAAAAGATGGCACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGAATTCTGGGAAATTCAATTAGGTATACCACATCCTGCAGGGCTAAAAAAGAAAAAATCAGTCACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATAAAGACTTCAGGAAATATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTATAATGTGCTTCCACAAGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAGATCCTGGAGCCTTTTAGAAAGACAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAACATAGAACAAAAGTAGAGGAACTGAGGCAACATCTGATGAGGTGGGGATTTACCACACCAGACAAAAAGCACCAGAAAGAACCCCCATTCCTTTGGATGGGGTATGAACTCCATCCTAATAAATGGACAGTACAGCCTATAGTACTGCCAGAAAAGGAAAGTTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTATGCAGGGATTAAAGTGAAGCAATTATGTAAGCTCCTTAGGGGAACCAAAGCATTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGATCCGGTACATGGAGCATATTATGACCCAGCAAAGGACTTAATAGCAGAACTACAGAAGCAGGGAGAAGGTCAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGGAAATATGCAAGAACGAGGGGTGCCCATACTAATGATGTAAAGCAATTAACAGAGGCAGTGCAAAAAATATCCACAGAAAGTATAGTAATATGGGGAAGGACTCCTAAATTTAGATTACCCATACAAAAAGAAACATGGGAAACATGGTGGACAGATTATTGGCAAGCCACCTGGATCCCTGAGTGGGAGTTTGTCAACACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAAAAAGAACCCATAATAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAATAGGGATAATAAATCAGGAAAAGCAGGATATGTTACTGATAGAGGAAGACAAAAGGTTGTCACCATAAATGACACAACAAATCAGAAAACTGAATTACAAGCCATTCATCTAGCCTTGCAGGATTCGGGGCTAGAAGTAAACATAGTAACAGACTCACAGTATGCATTAGGGATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTCAGTCAAATAATAGAACAGTTAATAAAAAAGGAAAAGGTCTACCTGGCATGGGTGCCAGCCCACAAAGGAATTGGAGGAAATGAACAGGTAGATAAACTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGAAAAAGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAACAATGGCTAGTGATTTTAACCTACCACCTATAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCTATGCATGGGCAAGTAGACTGTAGTCCAGGTATATGGCAGCTAGATTGTACACATTTAGAAGAAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTGATTCCAGCAGAGACAGGGCAAGAAACAGCATACTTTATTTTAAAACTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCCGCAATTTCACCAGTAATACAGTGAAGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGTATTCCCTACAATCCCCAGAGTCAAGGAGTAGTAGAATCTATGAATAATGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGATACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGAAATACAGACTAAAGAATTACAAAAACAAATTACAAAAATTCCAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAACAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAAATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4413, - "end": 4991, - "orientation": "forward", - "distance": 0.34375, - "indel_impact": 0, - "protein": "MENRWQVMIVWQVDRMRIRTWKSLVKYHMYISKKAKKWAYRHHYETTHPRISSEVHIPLGDDRLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPDLADHLIHVHYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALTAVITPKKIKPPLPSVKKLTEDRWNKPQKTKGHRESHTMNGH", - "nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAATACCATATGTATATTTCAAAGAAAGCCAAAAAATGGGCTTATAGACACCATTACGAAACCACTCATCCAAGAATAAGTTCAGAAGTACATATCCCACTAGGGGATGATAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAGAGAGACTGGCATTTGGGCCAGGGAGTCTCCATAGAATGGAGAAAAAGGAGATATAGCACACAAGTAGACCCTGACTTAGCAGACCATCTAATTCATGTGCATTATTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATAGAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAACAGCAGTAATAACACCAAAGAAGATAAAGCCACCTTTGCCTAGTGTAAAGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 4931, - "end": 5221, - "orientation": "forward", - "distance": 0.38144329896907214, - "indel_impact": 0, - "protein": "MEQAPEDQGPQREPYNEWTLELLEELKREAVRHFPRPWLQDLGQYIYETYGDTWTGVEAIIRILQQMLFIHFRIGCHHSRIGIVLQRRARNGASRS", - "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAGAGCTTTTAGAAGAGCTTAAGAGAGAAGCTGTTAGACATTTCCCTAGACCATGGCTACAAGACCTAGGACAATATATCTATGAAACTTATGGGGATACTTGGACAGGAGTGGAAGCCATAATAAGAATTCTGCAACAAATGCTGTTTATTCATTTCAGAATTGGGTGTCACCATAGCAGAATAGGCATTGTTCTGCAAAGAAGAGCAAGAAATGGAGCCAGTAGATCATAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5202, - "end": 5416, - "orientation": "forward", - "distance": 0.41666666666666674, - "indel_impact": 0, - "protein": "MEPVDHRLEPWKHPGSQPRTPCTNCYCKKCCFHCQVCFIQKALGISYGRKKRRQRRRSPQDSQTHQVPLPKX", - "nucleotides": "ATGGAGCCAGTAGATCATAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTCCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTGTGTTTTATACAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5341, - "end": 5416, - "orientation": "forward", - "distance": 0.5769230769230769, - "indel_impact": 0, - "protein": "MAGRSGDRDEDLLKTVRLIKFLYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5433, - "end": 5681, - "orientation": "forward", - "distance": 0.6837209302325582, - "indel_impact": 0, - "protein": "MQSLEILAIVALVVAAIIAIVVWTIVGIEYRKILRQRKIDKIINRIRERAEDSGNESEGDQEELSALVVEMGHDAPWDVDDL", - "nucleotides": "ATGCAATCTTTAGAGATATTAGCAATAGTAGCATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTAGGCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGATAAAATAATTAATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGCGAAGGGGATCAGGAAGAATTGTCAGCACTTGTGGTGGAAATGGGGCACGATGCTCCTTGGGATGTTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 5596, - "end": 8157, - "orientation": "forward", - "distance": 0.6589887640449441, - "indel_impact": 31, - "protein": "MRAKGIRKNCQHLWWKWGTMLLGMLMICSAAENLWVTVYYGVPVWRDANTTLFCASDAKAYDTEVHNVWATHACVPTDPNPHEVELKNVTENFNMWKNNMVDQMHEDIINLWDQSLKPCAKLTPLCVTLNCTDLRNDTVGNQTNLTETNTIQGREMTNCSFNITTEIRDKVRKEHALFYKLDVMPIDRDNTSYTLINCNTSVITQACPKVTFEPIPIHYCTPAGFAILKCKDEMFNGTGPCKNVSTVQCTHGIRPVVSTQLLLNGSLAEKEIVLRSENFTDNGKNIIVQLNRSIVINCTRPNNNTRKSISVAGRAIYATGQIIGDIRQAHCNISETDWNDTLSKIVEKLREKFGKDKTIIFNQSSGGDMEIETHSFNCGGEFFYCNTTRLFNSTWSVNGTSINGTKNITLPCRIKQIINRWQEVGKAMYAPPISGIIRCSSNITGLILTRDGGTNNSTEETETFRPGGGNMKDNWRSELYKYKVVKIEPLGVAPTKAQRRVVQREKRAIGALGAMFLGFLGAAGSTMGAASLTLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLSVERYLQDQQLLGIWGCSGKLICTTTVPWNTSWSNKSYDTIWDNMTWMQWDREIQNYTGKIYNLLEESQIQQEKNEKELLELDQWANLWNWFSITKWLWYIKIFIMIVGGLVGLKIIFAVFSIVNRVRQGYSPLSLQTLLPTPRGPDRPEGIEEEGGERDRGRSGRLVTGFLPLIWDDLRSLCLFSYHHLRDLLLIVLRTVQVLGHRGWEILKYWWSLLQYWIQELKNSAVSLLNTIAIAVAEGTDRVIEVGQRIGRAFLHIPRRIRQGLERALQ", - "nucleotides": "ATGAGAGCGAAGGGGATCAGGAAGAATTGTCAGCACTTGTGGTGGAAATGGGGCACGATGCTCCTTGGGATGTTGATGATCTGTAGTGCTGCGGAAAATTTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAGAGATGCAAATACCACTCTATTCTGTGCATCAGATGCTAAAGCATATGATACAGAAGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCTAACCCACATGAAGTAGAATTGAAAAATGTGACAGAAAATTTTAATATGTGGAAAAATAACATGGTGGACCAGATGCATGAGGATATAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGCAAAATTGACCCCACTCTGTGTTACTTTAAATTGCACTGATTTAAGAAATGATACTGTAGGAAATCAAACAAATCTCACTGAAACTAATACAATACAGGGAAGAGAAATGACAAACTGCTCTTTCAATATCACCACAGAGATAAGAGATAAAGTGCGAAAAGAACATGCACTTTTTTATAAACTTGATGTCATGCCAATAGATAGGGATAATACAAGTTATACATTGATAAATTGCAATACCTCAGTCATTACACAGGCCTGTCCAAAGGTAACCTTTGAACCAATTCCCATACATTATTGTACTCCAGCTGGTTTTGCAATTCTAAAGTGTAAGGATGAGATGTTCAATGGAACAGGACCATGTAAGAATGTCAGCACAGTACAATGTACACATGGAATTAGACCAGTAGTGTCAACTCAACTACTGTTAAATGGTAGCCTAGCAGAAAAAGAGATAGTACTTAGATCTGAAAATTTCACAGACAATGGTAAAAATATAATAGTACAGCTAAATAGATCTATAGTAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGTATAAGTGTAGCAGGGAGAGCAATTTATGCAACAGGGCAGATAATAGGAGATATAAGACAAGCACATTGTAACATCAGTGAGACAGATTGGAATGACACTTTAAGCAAAATAGTTGAAAAATTAAGGGAAAAATTTGGAAAAGATAAAACAATAATCTTTAATCAATCATCAGGAGGGGACATGGAAATTGAAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAACACAACACGACTGTTTAATAGTACTTGGAGTGTTAATGGAACTAGCATAAACGGAACTAAAAATATCACACTCCCGTGCAGAATAAAACAAATTATAAACAGGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCTATCAGTGGGATAATTAGGTGCTCATCAAATATTACAGGACTAATATTAACAAGAGATGGTGGTACAAATAATAGTACAGAGGAGACGGAGACCTTCAGACCTGGAGGGGGAAATATGAAGGACAATTGGAGAAGTGAATTATATAAATACAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAAGGCACAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAATAGGAGCGTTAGGAGCTATGTTCCTCGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCACTGACGCTGACGGTACAAGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATCTGCTGAGGGCTATTGAGGCGCAACAGCATATGTTGCAACTCACAGTCTGGGGCATCAAACAGCTCCAGGCAAGAGTCCTGTCTGTGGAAAGATACCTACAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTACTGTGCCTTGGAATACTAGTTGGAGCAATAAATCTTACGATACCATCTGGGATAACATGACCTGGATGCAGTGGGACAGAGAAATTCAAAATTACACAGGGAAAATATACAACTTACTTGAGGAATCGCAAATCCAACAGGAAAAGAATGAAAAGGAATTATTAGAACTAGATCAATGGGCAAATTTGTGGAATTGGTTTAGTATAACAAAATGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTAGTAGGTTTAAAAATAATTTTTGCTGTATTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCATTGCAGACCCTCCTCCCAACCCCGAGGGGACCAGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGGCAGATCCGGAAGATTAGTGACTGGATTCTTACCACTCATCTGGGACGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTACTCTTGATTGTGCTGAGGACTGTGCAAGTTCTGGGACACAGGGGGTGGGAGATCCTCAAGTACTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATACCATAGCTATCGCAGTAGCTGAGGGAACAGATAGGGTTATAGAGGTAGGACAAAGAATTGGCAGAGCTTTTCTCCACATACCTAGAAGGATAAGACAGGGATTAGAAAGGGCTTTGCAATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 7739, - "end": 7831, - "orientation": "forward", - "distance": 0.4838709677419355, - "indel_impact": 32, - "protein": "RPSSQPRGDQTGPKE", - "nucleotides": "AGACCCTCCTCCCAACCCCGAGGGGACCAGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGGCAGATCCGGAAGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 7740, - "end": 8015, - "orientation": "forward", - "distance": 0.4565217391304348, - "indel_impact": 0, - "protein": "DPPPNPEGTRQARRNRRRRWRERQRQIRKISDWILTTHLGRPAEPVPLQLPPLERLTLDCAEDCASSGTQGVGDPQVLVESPAVLDSGTKE", - "nucleotides": "GACCCTCCTCCCAACCCCGAGGGGACCAGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGGCAGATCCGGAAGATTAGTGACTGGATTCTTACCACTCATCTGGGACGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTACTCTTGATTGTGCTGAGGACTGTGCAAGTTCTGGGACACAGGGGGTGGGAGATCCTCAAGTACTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAACTAAAGAATAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8159, - "end": 8812, - "orientation": "forward", - "distance": 0.5935483870967742, - "indel_impact": 0, - "protein": "MGGKWSKGCMAGWPTVRERMERIDPRPAARREQAEPAAAGVGAASRDLEKYGAITSSNTSTTNAACAWLEAQEEEEVGFPVRPQVPLRPMTYKAALDLSHFLKEKGGLEGLIWSQRRQEILDLWVYHTQGYFPDWQNYTPGPGVRFPLTFGWCFKLVPVDPDQVEKANEGENNCLLHPMSLHGMEDSEGEVLMWKFDSSLAFHHRAREQHPEYYKDC", - "nucleotides": "ATGGGTGGCAAGTGGTCAAAAGGTTGTATGGCTGGATGGCCTACTGTAAGGGAAAGAATGGAAAGAATTGATCCAAGGCCTGCTGCAAGGAGGGAACAAGCTGAGCCAGCAGCAGCTGGGGTAGGAGCAGCATCTCGAGACTTGGAAAAATATGGAGCAATCACAAGTAGCAATACATCAACTACTAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAAGTAGGCTTTCCAGTCAGACCTCAGGTACCTTTAAGACCAATGACTTACAAAGCAGCTTTGGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTGGTCCCAGAGAAGACAAGAAATCCTTGATTTGTGGGTTTACCACACACAAGGATACTTCCCTGATTGGCAAAACTACACACCAGGACCAGGGGTCAGATTTCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGACCAAGTAGAGAAGGCCAATGAAGGAGAGAACAACTGCTTGCTACACCCTATGAGCCTGCATGGGATGGAAGACTCAGAGGGAGAAGTGTTAATGTGGAAGTTTGACAGCAGCCTGGCGTTTCATCATAGAGCCCGAGAACAACATCCGGAGTACTACAAAGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MN090376": [ - { - "region": "gag", - "start": 540, - "end": 1589, - "orientation": "forward", - "distance": 1.3350597609561752, - "indel_impact": 754, - "protein": "MYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSASVMMQKGNFRSQRKMVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFPQSRTEPTAPPEESFSFGEKTTTPSQKQEPIDQELYPLSSLKSLFGNDP", - "nucleotides": "TGCTACTGTATTAAATAATGATTTAAGTTCCTCTGATCCTGTCTGAAGTGCTGGTTGTAGCTGTCCCAGTATTTGTCTACAGCCTTCTGATGTTTCTAACAGGCCAGGATTAACTGCGAATCGTTCCAGTTCCCTGCTTGCCCATACTATATGTTTTAACTTATATCTTTTCTTTCCCCCTGGCCTTAACCGAATTTTCTCCCATTTGTCTAATTCTCCCCCGCTTAATACCGACGCTCTCGCACCCATCTCTCTCCTTCTAGCCTCCGCTAGTCAAAAAGGCGTACTCACCGTTCGCCGCCCCTCGCCTCTTGCTGTGCGCGCTTCAGCAAGCCGAGTCCGATAATTCTGGGATTAAATAAAATAGTAAGAATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGATCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCAAACCCAGATTGTAAGACCATTTTAAAAGCATTGGGTCCAGCAGCTACACTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCGGAAGCAATGAGCCAAGTAACAAACTCAGCTTCCGTGATGATGCAGAAAGGCAATTTTAGGAGCCAAAGAAAGATGGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCAAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGGAACTTTCCTCAAAGCAGAACAGAACCAACAGCCCCACCAGAGGAGAGCTTCAGTTTTGGGGAGAAGACAACAACTCCTTCTCAGAAGCAGGAGCCGATAGACCAGGAACTATACCCTCTGTCTTCCCTCAAATCACTCTTTGGCAACGACCCTTAGTCACAGTAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1382, - "end": 4393, - "orientation": "forward", - "distance": 0.1952380952380952, - "indel_impact": 0, - "protein": "FFRENLAFPQGEARELSSKQNRTNSPTRGELQFWGEDNNSFSEAGADRPGTIPSVFPQITLWQRPLVTVRIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVKQYDQISVEICGHKAIGTVLIGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALTEICKEMEKEGKISKIGPENPYNTPIFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKVEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGAKALTEVVPLTEEAELELAENREILKEPVHGVYYDPTKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWESWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRETKSGKAGYVTDRGRQKVISLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIQKEKVYLAWVPAHKGIGGNEQVDKLVSSGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNIPPVVAKEIVACCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVTTIHTDNGPNFTSNAVKAACWWAGVKQEFGIPYNPQSQGVVESMNKQLKQIIGQVREQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDGCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGGAACTTTCCTCAAAGCAGAACAGAACCAACAGCCCCACCAGAGGAGAGCTTCAGTTTTGGGGAGAAGACAACAACTCCTTCTCAGAAGCAGGAGCCGATAGACCAGGAACTATACCCTCTGTCTTCCCTCAAATCACTCTTTGGCAACGACCCTTAGTCACAGTAAGGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTGTTAGAAGAAATGAGTTTACCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGCTTTATCAAAGTAAAACAGTATGATCAGATATCAGTAGAAATTTGTGGACATAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAATATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGTACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAGAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAACAGAAATTTGTAAGGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAATATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAGTTAGTAGATTTTAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACACCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCCTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTACAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTACTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATATATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAGTAGAGGAACTGAGACAACATCTGTTGAAGTGGGGGTTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATACAACTGCCAGAGAAAGACAGCTGGACTGTCAATGATATACAGAAATTAGTGGGAAAGTTGAATTGGGCAAGTCAGATTTATCCAGGGATTAAAGTAAGGCAGTTATGTAAACTACTTAGAGGAGCCAAGGCGCTGACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATCCTAAAAGAACCAGTACATGGAGTGTATTATGACCCAACAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGTCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAACTTGAAAACAGGAAAATATGCAAGAACAAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAGACCCCTAAATTTAGACTACCCATTCAAAAAGAAACATGGGAATCATGGTGGACAGAATATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAAAAAGAACCCATAATAGGAGCAGAGACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACAAAATCAGGAAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAAGTTATCTCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAGGCACAACCAGATAAAAGCGAATCAGAGTTAGTCAATCAAATAATAGAGCAATTAATACAAAAGGAAAAGGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGGGGAAATGAACAAGTAGATAAATTAGTCAGTTCTGGAATCAGAAAGGTACTATTTTTGGATGGAATAGATAAAGCCCAAGATGAGCATGAAAAATATCACAGTAATTGGAGAGCAATGGCTAGTGACTTTAACATACCACCTGTAGTAGCAAAAGAAATAGTAGCCTGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAATTAGATTGTACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGACAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAACAACAATACATACAGACAATGGCCCCAACTTCACCAGTAATGCAGTTAAGGCCGCCTGTTGGTGGGCAGGGGTCAAACAAGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTCGTAGAATCTATGAATAAACAATTAAAACAAATTATAGGACAGGTAAGAGAGCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAGGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTGGTAATACAAGATAATAGTGACATAAAAGTAGTACCAAGAAGAAAAGCAAAAATTATTAGGGATTATGGAAAACAGATGGCAGGTGATGGTTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4338, - "end": 4919, - "orientation": "forward", - "distance": 0.4578680203045684, - "indel_impact": 0, - "protein": "MENRWQVMVVWQVDRMRISMWKSLVKYHIHNSGKARGWCYRHHYESRHPRISSEVHIPLGGEARLVVTTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPDLADQLIHLYYFNCFSESAIRQAILGHRVSPRCEYQAGHNKVGSLQYLALIALIPPKRKKPPLPSVRKLTEDRWKEPQRTKGHRESHTMNGH", - "nucleotides": "ATGGAAAACAGATGGCAGGTGATGGTTGTGTGGCAAGTAGACAGGATGAGGATTAGCATGTGGAAAAGCTTAGTAAAATACCATATACATAATTCAGGGAAGGCTAGGGGATGGTGTTATAGACATCACTATGAAAGCAGACATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGAGGAGAGGCTAGACTAGTAGTGACAACATATTGGGGACTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAAGGAGTCTCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTGACCTAGCAGACCAACTAATTCATCTGTATTATTTTAATTGTTTTTCAGAATCTGCTATAAGACAAGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAATAAGGTAGGATCTCTGCAGTACTTGGCACTAATAGCATTAATACCACCAAAAAGGAAAAAGCCACCTTTGCCTAGTGTGAGGAAACTGACAGAGGATAGATGGAAAGAGCCCCAGAGGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 4859, - "end": 5149, - "orientation": "forward", - "distance": 0.35051546391752586, - "indel_impact": 0, - "protein": "MERAPEDQGPQREPYNEWTLELLEELKREAVRHFPRAWLHGLGQHIYETYGDTWAGVEALIRILQQLVFIHFRIGCQHSRIGILTHRRARNGASRS", - "nucleotides": "ATGGAAAGAGCCCCAGAGGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAGAACTTTTAGAAGAGCTTAAAAGAGAAGCTGTTAGGCACTTTCCTAGGGCTTGGCTTCATGGCCTAGGACAACATATCTATGAAACTTATGGAGATACTTGGGCAGGAGTGGAAGCTCTAATAAGAATTCTGCAACAACTGGTGTTTATTCATTTCAGAATTGGATGTCAGCACAGCAGGATAGGCATTCTTACCCATAGAAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5130, - "end": 5344, - "orientation": "forward", - "distance": 0.5416666666666667, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPRTACTSCYCKKCCYHCPRCFLTKGLGISYGRKKRRQRRRTPQDSRNHQVSLSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTGGAGCCCTGGAAGCATCCAGGGAGTCAGCCTAGGACTGCGTGCACCAGTTGCTATTGTAAAAAGTGTTGCTATCATTGCCCGCGATGCTTCTTAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAGGATAGCCGGAACCATCAAGTTTCTTTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5269, - "end": 5344, - "orientation": "forward", - "distance": 0.7517241379310344, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLRIAGTIKFLYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAGGATAGCCGGAACCATCAAGTTTCTTTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5361, - "end": 5630, - "orientation": "forward", - "distance": 0.8152173913043479, - "indel_impact": 0, - "protein": "MQSLEILAIVALVVAFIIAIVVWSIVLIEYRKIRRQKRIDRILDRIRERAEDSGNESEGDQEELAALVEMGNHAHLDMGHHAPWDVNDL", - "nucleotides": "ATGCAATCTTTAGAAATATTAGCAATAGTAGCATTAGTAGTAGCATTTATAATAGCAATAGTCGTTTGGTCTATAGTACTCATAGAATATAGGAAAATAAGAAGACAAAAGAGAATAGACAGGATACTTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGACCAAGAAGAATTGGCAGCCCTCGTGGAGATGGGGAACCATGCTCATTTGGATATGGGCCACCATGCTCCTTGGGATGTCAATGACCTGTAA", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 5524, - "end": 8109, - "orientation": "forward", - "distance": 0.6166294642857151, - "indel_impact": 0, - "protein": "MRVKGTKKNWQPSWRWGTMLIWIWATMLLGMSMTCNAEDSWVTVYYGVPVWKEAATTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVELNVTENFNAWKNNMVDQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNINSTNINNTNSIEREMTNCSFNVTTVIRDKVQKQYALFYKLDVVQIKDDNTSYNTSYRLINCNSSTITQACPKVSFEPIPIHYCAPAGFAILKCKDKNFNGTGLCKNVSTVQCTHGIRPVVSTQLLLNGSLAEGDVVIRSKNFSNSAQTIIVQLNETVEINCTRPNNNTRRSIHIGPGRAFYATGDIIGDIRQAHCNISRAKWNETLKKIADKLREQFKRETIVFNQSSGGDPEIVMHSFNCRGEFFYCNTTQLFNSTWNSTTGVNGTGANNTEVNIITLPCKIRQIVNMWQEVGKAMYAPPISGQISCSSNITGLLLLRDGGNSNDTDNTTEIFRPGGGDMRDNWRSELYKYKVVQIEPLGVAPTKARRRVVQREKRAVGIGALFLGFLGAAGSTMGAASITLTVQARHLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTNVPWNSSWSNKNLSYIWNNMTWMQWEKEIDNYTEEIYNLIEQSQNQQEKNEQELLELDKWASLWNWFDITKWLWYIKIFIMIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTLLPAARGPDRPEGIGEEGGERDRDRSSHLAHGFLTIIWVDLRSLFLFSYHHLRDLLLIVARGVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRFIEVLQRIFRAIIHIPTRIRQGLERALL", - "nucleotides": "ATGAGAGTGAAGGGGACCAAGAAGAATTGGCAGCCCTCGTGGAGATGGGGAACCATGCTCATTTGGATATGGGCCACCATGCTCCTTGGGATGTCAATGACCTGTAATGCAGAGGACTCGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAGCCACCACTCTATTTTGTGCATCAGATGCCAAAGCATATGACACAGAGGTACACAATGTCTGGGCCACACATGCTTGTGTACCCACAGACCCCAACCCACAAGAAGTAGAATTAAATGTAACAGAAAATTTTAATGCATGGAAAAATAACATGGTAGATCAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAACTGCACTAACATCAATAGCACTAATATAAACAATACCAATAGTATAGAAAGAGAAATGACAAACTGCTCTTTTAATGTCACCACAGTCATAAGAGATAAGGTGCAAAAACAATATGCACTTTTTTATAAACTTGATGTAGTACAAATTAAGGATGATAATACCAGCTATAATACCAGCTATAGGTTGATAAATTGTAACTCCTCAACCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAATTTCAATGGAACAGGACTATGTAAAAATGTTAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGTTGTTAAATGGCAGTCTAGCAGAAGGAGATGTAGTAATTAGATCTAAAAATTTCTCTAACAGTGCTCAAACCATAATAGTACAATTAAACGAAACTGTAGAAATTAATTGTACAAGACCCAACAATAATACAAGAAGAAGTATACATATAGGACCAGGGAGAGCATTTTATGCAACAGGAGACATAATAGGAGATATAAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATGAGACTTTAAAAAAGATAGCTGATAAATTAAGGGAACAATTTAAAAGAGAGACAATAGTCTTTAACCAATCCTCAGGAGGGGACCCAGAGATTGTAATGCACAGTTTTAATTGTAGAGGGGAATTTTTCTACTGTAATACAACACAACTGTTTAATAGTACTTGGAATAGTACTACAGGAGTAAATGGTACAGGAGCAAATAACACAGAAGTAAATATTATCACACTCCCATGCAAGATAAGGCAAATTGTAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGTTGTTCATCAAATATTACAGGGCTACTACTACTAAGAGATGGTGGTAATAGCAATGACACGGATAACACGACTGAAATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTACAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAGGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTCTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCATCAATAACGCTGACGGTACAAGCCAGACATTTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATATGTTGCAGCTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTAGAAAGATACCTTAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATCTGCACCACTAATGTGCCTTGGAATAGTAGTTGGAGTAATAAAAATCTGAGTTACATTTGGAATAACATGACCTGGATGCAGTGGGAAAAAGAAATTGACAATTACACAGAGGAAATATACAACTTAATTGAACAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTGACATAACAAAGTGGCTGTGGTATATAAAAATATTTATAATGATAGTAGGAGGCTTAATAGGTTTAAGAATAGTTTTTACTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTTCAGACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAGCGCATGGATTCTTAACAATTATCTGGGTCGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCACTTGAGAGACTTACTCTTGATTGTAGCGAGGGGTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGCCAGGAACTAAAGAATAGTGCTGTCAGCTTGCTCAACGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGTTTATAGAAGTATTGCAAAGAATTTTTAGAGCTATTATCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCTTTGCTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 7691, - "end": 7783, - "orientation": "forward", - "distance": 0.4838709677419355, - "indel_impact": 0, - "protein": "RPSSQPRGDPTGPKESEKKVERETETDPVT", - "nucleotides": "AGACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 7692, - "end": 7967, - "orientation": "forward", - "distance": 0.4565217391304348, - "indel_impact": 0, - "protein": "DPPPSREGTRQARRNRRRRWRERQRQIQSLSAWILNNYLGRPAEPVPLQLPPLERLTLDCSEGCGTSGTQGVGSPQVLVESPAVLEPGTKE", - "nucleotides": "GACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAGCGCATGGATTCTTAACAATTATCTGGGTCGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCACTTGAGAGACTTACTCTTGATTGTAGCGAGGGGTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGCCAGGAACTAAAGAATAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8111, - "end": 8734, - "orientation": "forward", - "distance": 0.6291866028708133, - "indel_impact": 0, - "protein": "MGGIFSKRNGGGWPAIRERMRRAQPAADGVGAASRDLERHGALTSNNTSANNADCAWLEAQEEGEEVGFPVRPQVPLRPMTYKGAVDLSHFLKEKGGLEGLIYSQQRQDILDLWVYNTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPMEPEDVEKATEGEDNILLHPVNQHGMDDSEREVLVWKFDSRLAFRHVAREIHPEYYKNC", - "nucleotides": "ATGGGAGGCATATTCTCAAAACGTAATGGGGGTGGGTGGCCTGCAATAAGGGAAAGAATGAGACGAGCTCAGCCAGCAGCAGATGGGGTAGGAGCAGCATCTCGAGACCTGGAAAGACATGGAGCTCTCACAAGTAACAATACATCAGCTAACAATGCTGATTGTGCCTGGCTGGAAGCACAGGAAGAGGGGGAAGAAGTGGGTTTCCCAGTCAGACCTCAGGTACCTTTAAGACCAATGACATACAAGGGAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGTTAATTTACTCCCAACAAAGACAAGATATCCTTGATCTGTGGGTCTACAACACACAGGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATAAGATATCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAATGGAACCAGAGGATGTAGAAAAGGCCACTGAAGGAGAGGACAACATCTTGTTACACCCTGTGAACCAGCATGGGATGGACGACTCAGAGAGAGAAGTGTTAGTGTGGAAGTTTGACAGCCGCCTAGCATTCCGTCACGTGGCCCGAGAGATACATCCGGAGTACTACAAGAACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115581.1": [ - { - "region": "gag", - "start": 680, - "end": 2179, - "orientation": "forward", - "distance": 0.2675944333996021, - "indel_impact": 0, - "protein": "MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", - "nucleotides": "ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGACGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGCTAATGTAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1972, - "end": 4983, - "orientation": "forward", - "distance": 0.15109343936381703, - "indel_impact": 0, - "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTACTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4928, - "end": 5506, - "orientation": "forward", - "distance": 0.36269430051813467, - "indel_impact": 0, - "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", - "nucleotides": "ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5446, - "end": 5736, - "orientation": "forward", - "distance": 0.2886597938144331, - "indel_impact": 0, - "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", - "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5717, - "end": 5931, - "orientation": "forward", - "distance": 0.25, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5856, - "end": 5931, - "orientation": "forward", - "distance": 0.46153846153846145, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5948, - "end": 6193, - "orientation": "forward", - "distance": 0.727586206896552, - "indel_impact": 0, - "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL", - "nucleotides": "ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAAAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 6111, - "end": 8651, - "orientation": "forward", - "distance": 0.5241695303550973, - "indel_impact": 0, - "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", - "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGAAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAAAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8233, - "end": 8325, - "orientation": "forward", - "distance": 0.29032258064516125, - "indel_impact": 0, - "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", - "nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8234, - "end": 8509, - "orientation": "forward", - "distance": 0.3913043478260869, - "indel_impact": 0, - "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", - "nucleotides": "GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8653, - "end": 9267, - "orientation": "forward", - "distance": 0.5333333333333332, - "indel_impact": 0, - "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", - "nucleotides": "ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTATGAGCCTGCATGGGATGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115690.1": [ - { - "region": "gag", - "start": 777, - "end": 2285, - "orientation": "forward", - "distance": 0.19661354581673307, - "indel_impact": 0, - "protein": "MGARASVLSGGELDKWEKIRLRPGGKKKYRLKHIVWASRELERFAVNPGLLETSEGCRQIMGQLQPALQTGSEELRSLYNTVATLYCVHQRIEVKDTKEALEKVEEEQNKSKKKAQQAAAGAGNSSQTSTSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQATNSAAIMMQRGNFRNQRKSVKCFNCGKDGHIAKNCRAPRRKGCWKCGKEGHQMKDCPERQANFLGKIWPSHKGRPGNFLQNRPEPTAPPEESFRFGEETTTPSQKQEKIDQDLYPLASLKSLFGNDPSSQ", - "nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGACAAGTGGGAAAAAATTCGGTTGAGGCCAGGGGGAAAGAAAAAATATAGATTAAAGCATATCGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAACCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATAATGGGACAGCTACAACCAGCCCTTCAGACAGGATCAGAAGAACTTAGATCATTGTATAATACAGTAGCAACCCTCTATTGTGTACATCAAAGGATAGAGGTAAAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGGCGCAGGAAACAGCAGTCAGACCAGCACCAGCCAAAATTACCCTATAGTACAGAACATTCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTCTCAGCACTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTACATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAGCCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACTAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCCACCAGCATTCTAGACATAAGACAAGGACCAAAGGAGCCCTTTAGAGACTATGTAGATCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTAGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGTCCAGCAGCTACACTAGAGGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGTCATAAAGCAAGAGTTTTAGCGGAAGCAATGAGCCAAGCAACAAATTCAGCTGCCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAAAGTGTTAAGTGTTTTAATTGCGGCAAAGATGGGCACATAGCAAAAAATTGCAGGGCCCCTAGAAGAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTCCAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGAGGAGAGCTTCAGGTTTGGGGAGGAGACAACAACTCCCTCTCAGAAGCAGGAGAAGATAGACCAGGACCTGTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 2078, - "end": 5089, - "orientation": "forward", - "distance": 0.14129353233830844, - "indel_impact": 0, - "protein": "FFRENLAFPQGEAREFSSEQTRANSPTRGELQVWGGDNNSLSEAGEDRPGPVSFSFPQITLWQRPLVTIKVGGQLKEALLDTGADDTVLEDMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGVRYQYNVLPQGWKGSPAIFQSSMTKILEPYRKQNPNIVIYQYMDDLYVGSDLEIGQHRIKIEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIALPEKESWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGAKALTEVIPLTKEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQEQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKSGKAGYVTNRGRQKVIPLTDTTNQKTELHAIYLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQEDHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQEAAYFILKLAGRWPVATIHTDNGSNFTSATVKAACWWAGIKQEFGIPYNPQSQGVVESMNNELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDVIATDLQTKELQKQITKIQHFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSEIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGAGGAGAGCTTCAGGTTTGGGGAGGAGACAACAACTCCCTCTCAGAAGCAGGAGAAGATAGACCAGGACCTGTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGGTAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGACATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAAATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGCACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATAGGGCCTGAGAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGACTTCAGAGAACTAAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCCTATTTTTCAGTTCCCTTAGACAAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGGTTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAACCTTATAGAAAACAAAATCCAAACATAGTTATATATCAATACATGGATGATTTGTATGTAGGATCTGACCTAGAAATAGGGCAGCATAGAATAAAAATAGAAGAACTGAGACAACATCTGTTGAAGTGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGGTATGAACTCCATCCTGACAAGTGGACAGTACAGCCTATAGCGCTGCCAGAAAAAGAAAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAACTGGGCAAGTCAGATCTACCCAGGAATTAAAGTAAGGCAATTATGTAAACTACTTAGGGGAGCCAAAGCACTAACAGAAGTAATACCACTAACAAAAGAAGCAGAGCTAGAATTAGCAGAAAACAGGGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGAGCAAGGCCAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTGACAGAGGCAGTGCAAAAAATAGCCACAGAAAGCATAGTGATATGGGGAAAGACTCCTAAATTTAGACTACCCATACAAAAAGAAACATGGGAAGCATGGTGGACAGAATATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAACAGGGAAACTAAATCAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTATCCCCCTAACTGACACAACAAATCAGAAGACTGAGTTACATGCAATTTATCTAGCTTTGCAAGATTCGGGATTAGAAGTAAATATAGTAACAGACTCACAGTATGCATTAGGAATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTGTTTTTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTGGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTCATTCCAGCAGAGACAGGGCAGGAAGCAGCATACTTTATTTTAAAATTAGCAGGACGATGGCCAGTAGCAACAATACATACAGACAATGGCAGCAATTTCACCAGTGCTACAGTTAAGGCCGCCTGCTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAACAATGAATTGAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATTCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACGTAATAGCAACAGACTTACAAACTAAAGAACTACAAAAACAAATTACAAAAATTCAACATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGAAATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATTATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 5034, - "end": 5612, - "orientation": "forward", - "distance": 0.4093264248704662, - "indel_impact": 0, - "protein": "MENRWQVMIVWQVDRMRIRRWHSLVKHHIYISGKARGWVYKHHYENTHPRISSKVYIPLGEARLAVTTYWGLHTGERDWHLGQGVSIEWRKKEYSTQVDPNLADQLIHLYYFDCFSESAIRNVILGHRVSPRCEYQAGHNKVGSLQYLALTALVTPKKTKPPLPSITKLTEDRWNKPQRTKGHRGNHTMNGH", - "nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAAGATGGCACAGTTTAGTAAAACACCATATATATATTTCAGGGAAAGCTAGAGGATGGGTTTATAAACATCACTATGAAAACACTCATCCAAGAATAAGTTCAAAAGTATACATCCCACTAGGGGAAGCTAGACTGGCAGTAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCACTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGGAATATAGCACACAAGTAGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGTCATATTAGGACATAGAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTTTACAGTACTTGGCACTAACAGCATTAGTAACACCAAAAAAGACAAAGCCACCTTTGCCTAGTATTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAGAACCAAGGGCCACCGAGGGAACCATACAATGAATGGGCACTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5552, - "end": 5842, - "orientation": "forward", - "distance": 0.375, - "indel_impact": 0, - "protein": "MEQAPENQGPPREPYNEWALELLEELKSEAVRHFPRMWLHGLGQHIYETYGDTWTGVEALIRSLQQLLFIHFRIGCRHSRIGITPQRRTRNGASRS", - "nucleotides": "ATGGAACAAGCCCCAGAGAACCAAGGGCCACCGAGGGAACCATACAATGAATGGGCACTAGAGCTTTTAGAGGAGCTTAAGAGTGAAGCTGTTAGACATTTTCCTAGGATGTGGCTCCATGGCTTAGGGCAACACATCTATGAGACTTATGGGGATACTTGGACAGGAGTGGAAGCCTTAATAAGAAGTCTGCAACAACTGCTGTTCATTCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGAATTACTCCACAGAGGAGAACAAGAAATGGAGCCAGTAGATCCTAA", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5823, - "end": 6037, - "orientation": "forward", - "distance": 0.41666666666666674, - "indel_impact": 0, - "protein": "MEPVDPNLEPWKHPGSQPKTACTTCYCKQCCYHCQVCFITKGLGISYGRKKRRQRRGSPQDSQIHQASLSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAATCTAGAGCCCTGGAAGCATCCAGGAAGCCAGCCTAAGACTGCTTGTACCACTTGCTATTGTAAACAGTGTTGCTATCATTGCCAAGTTTGTTTCATAACCAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAACGACGAGGATCTCCTCAAGACAGTCAGATTCATCAAGCTTCTCTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5962, - "end": 6037, - "orientation": "forward", - "distance": 0.5769230769230769, - "indel_impact": 0, - "protein": "MAGRSGDNDEDLLKTVRFIKLLYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAACGACGAGGATCTCCTCAAGACAGTCAGATTCATCAAGCTTCTCTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 6054, - "end": 6299, - "orientation": "forward", - "distance": 0.5848837209302327, - "indel_impact": 0, - "protein": "MQSLAILAIVALVVAAIIAIVVWTIVFIEYRKILRQRKIDRLIDRIRERAEDSGNESEGDHEELSALMEMGHHAPWDVDDL", - "nucleotides": "ATGCAATCTTTAGCAATATTAGCAATAGTAGCATTAGTAGTAGCAGCCATAATAGCAATAGTTGTGTGGACCATAGTATTCATAGAATATAGGAAAATATTAAGACAAAGGAAAATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGATAGTGGCAATGAGAGTGAAGGGGATCATGAAGAATTATCAGCACTTATGGAGATGGGGCATCATGCTCCTTGGGATGTTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 6217, - "end": 8799, - "orientation": "forward", - "distance": 0.6138702460850114, - "indel_impact": 31, - "protein": "MRVKGIMKNYQHLWRWGIMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYDTEAHNVWATHACVPTDPNPQEIGLENVTENFNMWKNNMVEQMHEDIISLWDQSLKPCVKLTPLCVTLHCTKLEINSTKKTNSTNNGTNINATDDSWGEMKNCSFNTTASIRDKVQREFALFYKLDIVPIDNDDINYRLISCNTSVLTQACPKVSFEPIPIHYCAPAGFAILKCRDKNFNGTGQCKNVSTVQCTHGIRPVVSTQLLFNGSLAEEEVVIKSENITDNTKTIIVQLNASVAIVCTRPNNNTRKSIPIGPGRAFYAAGDIIGDIRRAHCILNKTTWDNTIEQVAKKLREQFENKTIVFSESSGGDPEITMISFNCGGEFFYCNSVQLFNSTWHNNGSSTTGSSSSEGNITLPCKIKQIINMWQEVGKAMYAPPIRGPISCESNITGLLLTRDGGNDANGNNTETFRPGGGNMRDNWRSELYRYKVVKIEPLGVAPTRAQRRVVQREKRAVGLGALFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLKDQQLLGLWGCSGKLICNTAVPWNTSWSNKSLDDIWHNMTWMEWEREIDNYTNIIYSLIEASQTQQEKNEQELLELDKWASLWNWFSISNWLWYIKIFIMIVGGLVGLRIVFTILSIVNRVRKGYSPLSFQTHLPAQRGPDRPEGTGDEGGEQDRDRSTHLVDGFLAIIWVDLRNLCLFLYHRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWRRELKNSAVSLLNATAIAVAEGTDRVIEGLRRAFRAIIHIPRRIRQGLERALQ", - "nucleotides": "ATGAGAGTGAAGGGGATCATGAAGAATTATCAGCACTTATGGAGATGGGGCATCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTTACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAAACACCACTCTATTCTGTGCATCAGATGCTAAAGCATATGATACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCTACAGACCCCAACCCACAAGAAATAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTACATTGCACTAAGTTGGAGATTAATAGCACTAAGAAGACTAATAGCACTAATAATGGTACTAACATCAATGCCACTGATGATAGTTGGGGGGAAATGAAAAACTGCTCTTTCAATACCACTGCAAGCATAAGAGATAAGGTACAGAGAGAATTTGCGCTTTTTTATAAACTTGATATAGTACCAATAGATAATGATGATATCAACTATAGGTTAATAAGTTGTAACACCTCAGTCCTTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAGAGATAAAAATTTCAATGGAACAGGACAATGTAAAAATGTCAGCACAGTGCAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTCAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAAATCTGAAAATATCACAGACAATACTAAAACTATAATAGTACAGCTGAATGCATCTGTAGCAATTGTTTGTACAAGACCCAACAACAATACAAGAAAAAGTATACCTATAGGGCCAGGGAGAGCGTTTTATGCAGCAGGAGATATAATAGGAGACATAAGACGAGCACACTGTATCCTTAACAAAACAACATGGGATAACACAATAGAACAGGTAGCTAAAAAATTAAGAGAACAATTTGAGAATAAGACAATAGTCTTTAGTGAATCCTCGGGAGGGGACCCAGAAATTACAATGATTAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAGTACAACTGTTTAATAGTACTTGGCATAATAATGGGAGTAGTACTACAGGGTCAAGTAGCAGTGAAGGCAATATCACACTCCCATGCAAAATAAAACAAATTATAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGAGGACCAATTAGCTGCGAGTCAAATATTACAGGGTTGCTACTAACAAGAGATGGTGGGAATGACGCTAACGGGAACAACACCGAGACCTTCAGACCTGGAGGAGGAAATATGAGGGACAATTGGCGAAGTGAATTATATAGATACAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAGGGCACAGAGAAGAGTGGTACAGAGAGAAAAAAGAGCAGTGGGTCTCGGAGCCTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATAACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATATGTTGCAACTCACAGTCTGGGGCATCAAACAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGCTTTGGGGTTGCTCTGGAAAACTCATCTGCAACACTGCTGTGCCTTGGAATACTAGTTGGAGTAACAAATCTCTGGATGATATTTGGCATAACATGACCTGGATGGAGTGGGAAAGAGAAATTGACAATTACACAAACATAATATACAGCTTAATTGAGGCATCGCAAACCCAGCAAGAAAAGAATGAACAAGAATTACTAGAATTAGACAAATGGGCAAGTCTGTGGAATTGGTTTAGCATATCAAACTGGCTGTGGTACATAAAAATATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTACTATACTTTCTATAGTGAATAGAGTTAGGAAGGGATACTCACCATTATCGTTTCAGACCCACCTCCCAGCCCAGAGGGGACCCGACAGGCCCGAAGGAACCGGAGACGAAGGTGGAGAGCAAGACAGAGACAGATCCACTCACTTAGTGGATGGATTCTTAGCAATTATCTGGGTCGACCTGCGGAACCTGTGCCTCTTCCTCTACCACCGCTTGAGAGACTTACTCTTGATTGCAGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATACTGGTGGAACCTTCTGCAGTATTGGAGGCGGGAACTAAAGAATAGTGCTGTTAGCTTGCTTAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGAGTTATAGAAGGATTGCGCAGAGCTTTTAGAGCTATTATCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGAGCTTTGCAATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8381, - "end": 8473, - "orientation": "forward", - "distance": 0.5806451612903225, - "indel_impact": 0, - "protein": "RPTSQPRGDPTGPKEPETKVESKTETDPLT", - "nucleotides": "AGACCCACCTCCCAGCCCAGAGGGGACCCGACAGGCCCGAAGGAACCGGAGACGAAGGTGGAGAGCAAGACAGAGACAGATCCACTCACTTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8382, - "end": 8657, - "orientation": "forward", - "distance": 0.4623655913978495, - "indel_impact": 0, - "protein": "DPPPSPEGTRQARRNRRRRWRARQRQIHSLSGWILSNYLGRPAEPVPLPLPPLERLTLDCSEDCGTSGTQGVGSPQILVEPSAVLEAGTKE", - "nucleotides": "GACCCACCTCCCAGCCCAGAGGGGACCCGACAGGCCCGAAGGAACCGGAGACGAAGGTGGAGAGCAAGACAGAGACAGATCCACTCACTTAGTGGATGGATTCTTAGCAATTATCTGGGTCGACCTGCGGAACCTGTGCCTCTTCCTCTACCACCGCTTGAGAGACTTACTCTTGATTGCAGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATACTGGTGGAACCTTCTGCAGTATTGGAGGCGGGAACTAAAGAATAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8801, - "end": 9434, - "orientation": "forward", - "distance": 0.5274038461538462, - "indel_impact": 0, - "protein": "MGGKWSKCSIVGWPTVRERIRRAEPAAEGVGAVSRDLEKHGAITSSNANNADCTWLEAQKEEEEVGFPVRPQVPLRPMTYKAALDLSHFLKEKGGLEGIIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRFPLTFGWCFKLVPVEPERVEEENKRENRCLLHPMSQHGMDDPEKEVLQWRFDSRLAFHHVARELHPEYYKN", - "nucleotides": "ATGGGTGGTAAATGGTCAAAATGTAGTATAGTTGGATGGCCTACTGTAAGGGAAAGAATAAGACGAGCAGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAAGCATGGAGCAATCACAAGTAGCAATGCTAACAATGCTGATTGTACCTGGCTGGAAGCCCAAAAAGAAGAGGAGGAGGTAGGCTTTCCAGTCAGGCCTCAGGTACCCTTAAGACCAATGACTTACAAGGCAGCCTTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGATAATTTACTCCCAAAAAAGACAAGATATTCTTGATCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACTAGATTTCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGAGCCAGAGAGGGTAGAAGAGGAGAATAAAAGAGAGAACCGCTGCTTGTTACACCCTATGAGCCAGCATGGGATGGATGACCCAGAGAAAGAAGTGCTACAGTGGAGGTTTGACAGCCGCCTAGCCTTTCACCACGTAGCCAGAGAGCTGCATCCGGAGTACTATAAGAACTAGAACTGCTGACATCTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115571.1": [ - { - "region": "gag", - "start": 579, - "end": 2078, - "orientation": "forward", - "distance": 0.2675944333996021, - "indel_impact": 0, - "protein": "MGARASILSGGKLDSWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", - "nucleotides": "ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATTCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGTTAATGTAATGATGCAAAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1871, - "end": 4882, - "orientation": "forward", - "distance": 0.15109343936381703, - "indel_impact": 0, - "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEAFLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAAATAGGGGGGCAATTTAAAGAAGCTTTCTTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTAAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTGCTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4827, - "end": 5405, - "orientation": "forward", - "distance": 0.37823834196891193, - "indel_impact": 0, - "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", - "nucleotides": "ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5345, - "end": 5635, - "orientation": "forward", - "distance": 0.2886597938144331, - "indel_impact": 0, - "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", - "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5616, - "end": 5830, - "orientation": "forward", - "distance": 0.25, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGCAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5755, - "end": 5830, - "orientation": "forward", - "distance": 0.46153846153846145, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5847, - "end": 6092, - "orientation": "forward", - "distance": 0.6931034482758622, - "indel_impact": 0, - "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL", - "nucleotides": "ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 6010, - "end": 8550, - "orientation": "forward", - "distance": 0.5251716247139588, - "indel_impact": 0, - "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEVHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRDKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGNMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLDQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", - "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAGATAAGATGCAGAAAGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATGATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGGGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGGGACTGAGACCTTCAGACCTGGAGGAGGAAATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGGATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTTAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8132, - "end": 8224, - "orientation": "forward", - "distance": 0.29032258064516125, - "indel_impact": 0, - "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", - "nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8133, - "end": 8408, - "orientation": "forward", - "distance": 0.423913043478261, - "indel_impact": 0, - "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", - "nucleotides": "GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8552, - "end": 9166, - "orientation": "forward", - "distance": 0.5478260869565217, - "indel_impact": 0, - "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDNEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", - "nucleotides": "ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAATGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGATACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTGCACCCTACGAGCCTGCATGGGGTGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115514.1": [ - { - "region": "gag", - "start": 584, - "end": 2083, - "orientation": "forward", - "distance": 0.2675944333996021, - "indel_impact": 0, - "protein": "MGARASILSGGKLDEWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", - "nucleotides": "ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGAATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGTTAATGTAATGATGCAAAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1876, - "end": 4887, - "orientation": "forward", - "distance": 0.14811133200795235, - "indel_impact": 0, - "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGCTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAATACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTGCTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4832, - "end": 5410, - "orientation": "forward", - "distance": 0.37823834196891193, - "indel_impact": 0, - "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGDIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", - "nucleotides": "ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGAGATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5350, - "end": 5640, - "orientation": "forward", - "distance": 0.2886597938144331, - "indel_impact": 0, - "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", - "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5621, - "end": 5835, - "orientation": "forward", - "distance": 0.25, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5760, - "end": 5835, - "orientation": "forward", - "distance": 0.46153846153846145, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5852, - "end": 6097, - "orientation": "forward", - "distance": 0.6931034482758622, - "indel_impact": 0, - "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL", - "nucleotides": "ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 6015, - "end": 8555, - "orientation": "forward", - "distance": 0.528604118993135, - "indel_impact": 0, - "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEVHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNITNTNSSSKEGKMAEEMRNCSFNITTEIRNKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKSVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIVFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", - "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATATTACTAATACCAATAGCAGTAGTAAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAGAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATGATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAGTGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGGGAATAAAACAATAGTCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGGGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTAAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8137, - "end": 8229, - "orientation": "forward", - "distance": 0.29032258064516125, - "indel_impact": 0, - "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", - "nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8138, - "end": 8413, - "orientation": "forward", - "distance": 0.423913043478261, - "indel_impact": 0, - "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", - "nucleotides": "GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8557, - "end": 9171, - "orientation": "forward", - "distance": 0.5478260869565217, - "indel_impact": 0, - "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDGEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", - "nucleotides": "ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACGGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTACGAGCCTGCATGGGGTGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115488.1": [ - { - "region": "gag", - "start": 707, - "end": 2206, - "orientation": "forward", - "distance": 0.2675944333996021, - "indel_impact": 0, - "protein": "MGARASILSGGKLDEWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", - "nucleotides": "ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGAATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGTTAATGTAATGATGCAAAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1999, - "end": 5010, - "orientation": "forward", - "distance": 0.14811133200795235, - "indel_impact": 0, - "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGCTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAATACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTGCTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4955, - "end": 5533, - "orientation": "forward", - "distance": 0.37823834196891193, - "indel_impact": 0, - "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGDIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", - "nucleotides": "ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGAGATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5473, - "end": 5763, - "orientation": "forward", - "distance": 0.2886597938144331, - "indel_impact": 0, - "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", - "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5744, - "end": 5958, - "orientation": "forward", - "distance": 0.25, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5883, - "end": 5958, - "orientation": "forward", - "distance": 0.46153846153846145, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5975, - "end": 6220, - "orientation": "forward", - "distance": 0.6931034482758622, - "indel_impact": 0, - "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL", - "nucleotides": "ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 6138, - "end": 8678, - "orientation": "forward", - "distance": 0.528604118993135, - "indel_impact": 0, - "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEVHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNITNTNSSSKEGKMAEEMRNCSFNITTEIRNKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKSVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIVFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", - "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATATTACTAATACCAATAGCAGTAGTAAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAGAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATGATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAGTGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGGGAATAAAACAATAGTCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGGGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTAAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8260, - "end": 8352, - "orientation": "forward", - "distance": 0.29032258064516125, - "indel_impact": 0, - "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", - "nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8261, - "end": 8536, - "orientation": "forward", - "distance": 0.423913043478261, - "indel_impact": 0, - "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", - "nucleotides": "GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8680, - "end": 9294, - "orientation": "forward", - "distance": 0.5478260869565217, - "indel_impact": 0, - "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDGEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", - "nucleotides": "ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACGGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTACGAGCCTGCATGGGGTGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115030.1": [ - { - "region": "gag", - "start": 176, - "end": 1684, - "orientation": "forward", - "distance": 0.2328685258964145, - "indel_impact": 0, - "protein": "MGARASVLSGGELDRWEKIYLRPGGKKKYRLKHIVWASRELERFAVNPGLLESSEGCRQILGQLQPALQTGSEELRSLYNTVAVLYCVHQRINVKDTKEALDKIEEEQNKSKKKAQQAAADTGNSSNSQVSQNYPIVQNLQGQMVHQPISPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINDEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHTPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTKSASIMVQGGNFRNQRKNVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPAESFRFGEEATAPPQKQETKDQELYPLASLRSLFGNDP", - "nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGCGGAGAATTAGATAGATGGGAAAAAATTTACCTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTCAACCCTGGCCTGTTAGAGTCATCAGAAGGCTGCAGGCAAATTCTGGGACAGCTACAACCAGCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAGTCCTCTATTGTGTACATCAAAGGATAAATGTAAAAGACACCAAGGAAGCTCTAGACAAAATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAACAAGCAGCAGCTGACACAGGAAACAGCAGCAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACCTACAGGGGCAAATGGTACATCAACCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAATAGAAGAAAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCACTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAGATGTTAAAAGAAACCATCAATGATGAGGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGCCCTATTGCACCAGGCCAGATGAGGGAACCAAGGGGAAGTGACATAGCAGGAACCACTAGTACCCTTCAGGAACAAATAGGATGGATGACACATACTCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTGGGGTTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTTTGGACATAAGACAAGGACCAAAGGAACCCTTTAGGGATTATGTAGACCGGTTCTATAAAACTCTAAGGGCTGAGCAAGCGTCACAGGATGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAATCCAGATTGTAAGACCATTTTAAAAGCATTAGGACCAGCAGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGGGGACCCGGCCACAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAAATCAGCCTCCATAATGGTGCAGGGAGGCAATTTTAGGAACCAAAGAAAGAATGTTAAGTGTTTCAATTGTGGCAAAGAAGGACACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATTTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTGGGGAAGAGGCAACAGCTCCCCCTCAGAAGCAGGAGACGAAGGACCAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1477, - "end": 4488, - "orientation": "forward", - "distance": 0.22266401590457252, - "indel_impact": 0, - "protein": "FFREDLAFPQGEAREFSPEQTRANSPASRELQVWGRGNSSPSEAGDEGPGTVSFSFPQITLWQRPIISIKIGGQLKEALLDTGADDTVLEEMNLPGKWKPKMIGGIGGFIKVRQYDQVPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALIEICAEMEKEGKITKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKRKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNEAPGVRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKENPDIVIYQYMDDLYVGSDLEIEQHRTKIEELRQHLLGWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPDKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKEPVHGAYYDPSKDLIAEVQKQGGDQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIALEAIVIWGKTPKFKLPIQKETWEMWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRETKLGKAGYVTDRGRQKVVSLMDTTNQRTELHAIHLALQDSGSTVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKIYLAWVPAHKGIGGNEQVDKLVSNGIRKILFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEPMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIAAETGQETAYFILKLAGRWPVKIIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNNELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQISKIQNFRVYYRDSREPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKILRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAGATTTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTGGGGAAGAGGCAACAGCTCCCCCTCAGAAGCAGGAGACGAAGGACCAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAAAAATAGGAGGGCAACTAAAAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGGTACCCATAGAAATTTGTGGACATAAAGCTATAGGTACAGTATTAATAGGACCTACACCAGTCAACATAATTGGAAGAAATCTGTTGACCCAGCTTGGTTGCACTTTAAATTTTCCCATCAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAGAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAGGCATTAATAGAAATTTGTGCAGAAATGGAAAAGGAAGGGAAAATTACAAAAATTGGGCCTGAGAATCCATATAATACTCCAGTGTTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCCGAGAACTTAATAAGAGAACACAAGACTTCTGGGAAGTTCAACTAGGAATACCACATCCCGCAGGGTTAAAAAGGAAAAAATCAGTAACAGTACTAGATGTGGGTGATGCATATTTTTCAGTTCCTTTAGACAAAGACTTCAGAAAGTATACTGCATTTACCATACCCAGTATAAACAATGAGGCACCAGGAGTTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGGAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGAGCAGCACAGAACAAAAATAGAGGAACTGAGACAACATCTGTTGGGGTGGGGACTCACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATACAGCTGCCAGACAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAGTTGAATTGGGCAAGCCAGATCTATCCAGGGATTAAAGTAAAGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGAACCAGTACATGGAGCATATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGGGGAGACCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGGAAATATGCAAGAACGAGGGGTGCCCACACTAATGATGTAAAACAGTTAACAGAGGCAGTGCAAAAAATAGCCCTAGAAGCCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCTATACAAAAGGAAACATGGGAAATGTGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAATTTGTCAATACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAGAAAGAACCCATAATAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAGACTAAATTAGGAAAAGCAGGGTATGTTACGGACAGAGGAAGACAAAAGGTTGTCTCCCTAATGGACACAACAAATCAGAGGACTGAGTTACACGCAATTCATCTAGCTTTGCAGGATTCAGGATCAACAGTAAACATAGTGACAGACTCACAATATGCCTTAGGGATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGCTAATAAAAAAGGAAAAGATTTACCTGGCATGGGTCCCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTAATGGAATCAGAAAAATACTATTTCTGGATGGAATAGATAAGGCCCAAGAAGAACATGAAAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAGTGTCAGTTAAAAGGAGAACCTATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTGGATTGTACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATACATAGAAGCAGAAGTTATTGCAGCAGAAACAGGGCAGGAAACAGCATACTTTATCTTAAAATTGGCAGGAAGATGGCCAGTAAAAATAATACATACAGACAATGGCAGCAATTTCACCAGCACTACGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGTATTCCCTACAATCCTCAAAGTCAAGGAGTAGTAGAATCTATGAATAATGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATTCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAACTACAAAAACAAATTTCAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGAACCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCCTTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4433, - "end": 5011, - "orientation": "forward", - "distance": 0.3969072164948453, - "indel_impact": 0, - "protein": "MENRWQVMIVWQVDRMRIRTWNSLVKHHMHISKKAQRWVYRHHYESHNPKTSSEVHIPLGEARLVIKTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPGLADQLIHLYYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALTALITPKRRKPPLPSVTKLTEDRWNKSQRTKGHKGSHTMNGH", - "nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATATTTCAAAGAAAGCTCAGAGATGGGTTTACAGACATCACTATGAAAGCCATAATCCAAAAACAAGTTCAGAAGTACACATCCCATTAGGGGAAGCAAGATTGGTAATAAAAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGCCAGGGAGTATCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTGGCCTGGCAGACCAACTAATTCACCTGTATTATTTTGATTGTTTTTCAGAATCTGCTATAAGGAATGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAGGCAGGACATAACAAGGTAGGATCCCTACAATACTTGGCATTAACAGCATTAATAACACCAAAAAGGAGAAAGCCACCTTTGCCCAGTGTTACAAAATTAACAGAGGACAGATGGAACAAGTCCCAGAGGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGACATTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 4951, - "end": 5241, - "orientation": "forward", - "distance": 0.34375, - "indel_impact": 0, - "protein": "MEQVPEDQGPQREPYNEWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDTWAGVEDIIRTLQQLLFIHFRIGCQHSRIGIIRQRRARNGASRS", - "nucleotides": "ATGGAACAAGTCCCAGAGGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGACATTAGAGCTCTTAGAAGAGCTTAAGAGTGAAGCTGTTAGGCATTTTCCTAGACCATGGCTTCATAGCTTAGGACAATATATCTATGAAACTTATGGAGATACTTGGGCAGGGGTGGAAGACATAATAAGAACCCTGCAACAACTGTTGTTCATTCATTTCAGAATTGGGTGTCAACATAGCAGGATAGGAATTATTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5222, - "end": 5436, - "orientation": "forward", - "distance": 0.5945945945945945, - "indel_impact": 0, - "protein": "MEPVDPSLEPWKHPGSQPMTACNNCYCKRCCFHCQVCFTRKGLGISYGRKKRRQRRRASHSSQNHQAALPEX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGCTTAGAGCCCTGGAAACATCCAGGAAGTCAACCTATGACTGCTTGTAACAATTGCTATTGTAAACGGTGTTGCTTTCATTGCCAAGTTTGCTTCACAAGAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5361, - "end": 5436, - "orientation": "forward", - "distance": 0.5925925925925926, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLTAVRIIKRLYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5453, - "end": 5698, - "orientation": "forward", - "distance": 0.8160919540229887, - "indel_impact": 0, - "protein": "MHILEIAAIVGLVVAAIIAIVVWSIVLIEYRKILRQRKIDRLIDRIRERAEDSGNESDGDQEELSAIVEMGHLVPWDGDDM", - "nucleotides": "ATGCATATCTTAGAAATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGTCTATAGTATTAATAGAATATAGGAAAATTTTAAGACAAAGGAAAATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAAAGTGACGGGGACCAGGAGGAATTATCAGCAATTGTGGAGATGGGGCATCTTGTTCCTTGGGATGGTGATGATATGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 5616, - "end": 8216, - "orientation": "forward", - "distance": 0.6431111111111119, - "indel_impact": 0, - "protein": "MKVTGTRRNYQQLWRWGILFLGMVMICSARNLWVTVYYGVPVWKEATTTLFCASDAKAYETEKHNVWATHACVPTDPSPQEVALENVTETFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDNLNLNRPNNNTCSNNTNYNITEKGEIKNCSFNVTTGIRDRVTKEHALFYKLDVVPIDEGSGNTTGKYRMINCNTSVITQACPKVSFEPIPIHFCAPAGFAILKCKDKKFNGTGECRNVSTVQCTHGIRPVVSTQLLLNGSLAEEEIVIRSANLSNNAKTIIVQLNKSVKINCTRPNNNTRRSIHIGPGRAFYGTDIIGDIRQAHCNISGKDWNDTLKQIVIKLKEKFENKTIVFTQSSGGDPEIVMHSFNCGGEFFYCNTTQLFNSTWNNNTWNGTDNWNGTESNSTITLPCRIKQIINLWQEVGRAMYAPPIQGQIRCSSNITGLLLVRDGGSNNSSNDTETFRPGGGDMRDNWRSELYKYKVVKIEPLGIAPTHAKRRVVQREKRAMGLGAFFLGFLGAAGSTMGAASVTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLALERYLKDQQLLGIWGCSGKLICTTNVPWNISWNPRWNRSLDEIWTNMTWKQWEKEIDNYTDIIYNLIEQSQNQQEQNEQDLLALDKWASLWNWFDITQWLWYIKIFIMIVGGLIGLRIVFTILSIVNRVRQGYSPLSLQTLLPTQRGPDRPEGTEEGGGERDRGTSTRLVHGFLALIWDDLRSLFLFSYHRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWSQEIKNSAVSLLNTTAIAVAEGTDRIIEVLQRGFRAILHIPTRIRQGLERALL", - "nucleotides": "ATGAAAGTGACGGGGACCAGGAGGAATTATCAGCAATTGTGGAGATGGGGCATCTTGTTCCTTGGGATGGTGATGATATGTAGTGCCAGAAACTTGTGGGTCACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAACTACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGAGACAGAGAAGCATAATGTTTGGGCAACCCATGCCTGTGTACCCACAGACCCCAGCCCACAGGAAGTAGCATTGGAAAATGTGACAGAAACATTTAACATGTGGAAAAATGACATGGTAGAGCAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTGAAACTAACCCCACTCTGTGTTACTTTAAATTGCACTGATAATTTAAATCTAAATCGCCCTAACAATAATACTTGTAGTAATAATACTAATTATAATATAACGGAAAAAGGAGAAATAAAAAACTGCTCTTTCAATGTCACCACAGGAATAAGAGATAGGGTGACAAAAGAACATGCACTTTTCTATAAACTTGATGTAGTACCAATAGATGAAGGTAGTGGAAATACTACGGGCAAATATAGGATGATAAATTGTAACACCTCAGTCATTACACAAGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTTTTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAAGATAAGAAATTCAATGGAACAGGAGAATGTAGAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGCTGAACGGCAGTCTAGCAGAAGAAGAGATAGTAATTAGATCTGCCAATCTCTCGAACAATGCTAAAACCATAATAGTACAGCTGAATAAATCTGTAAAAATTAATTGTACAAGACCCAACAATAATACAAGAAGAAGTATACACATAGGACCAGGGAGGGCATTTTATGGAACAGACATAATAGGGGATATAAGACAAGCGCATTGTAACATTAGTGGGAAAGATTGGAATGACACTTTAAAACAGATAGTTATAAAATTAAAAGAAAAATTTGAGAATAAAACAATAGTCTTTACTCAATCCTCAGGAGGGGACCCAGAGATAGTGATGCATAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAATAATACTTGGAATGGTACTGATAATTGGAATGGTACTGAATCAAATAGCACTATCACACTCCCATGCAGAATAAAACAAATTATAAACTTGTGGCAGGAAGTAGGAAGAGCAATGTATGCCCCTCCCATCCAAGGGCAAATTAGATGTTCATCAAATATTACAGGGCTGCTGCTAGTAAGAGATGGTGGAAGCAACAATAGTAGTAATGATACAGAGACCTTCAGGCCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTCAAAATTGAACCATTAGGAATAGCACCCACCCATGCAAAGAGAAGAGTGGTGCAAAGGGAAAAAAGAGCAATGGGACTTGGAGCTTTCTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTTTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTAATGTGCCCTGGAATATTAGTTGGAACCCTAGATGGAATAGATCTCTAGATGAGATTTGGACTAACATGACCTGGAAGCAGTGGGAAAAGGAAATTGACAATTACACAGACATAATATATAACTTAATTGAACAATCACAGAACCAACAAGAACAGAATGAACAAGACTTATTGGCATTAGATAAGTGGGCAAGTTTGTGGAATTGGTTTGACATTACACAGTGGCTATGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGATAGGTTTAAGAATAGTTTTTACTATACTGTCTATAGTAAATAGAGTTAGGCAGGGATACTCACCATTGTCATTGCAGACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAGGAACAGAAGAAGGAGGTGGAGAGAGAGACAGAGGCACATCCACGAGATTAGTGCATGGATTCTTAGCACTCATCTGGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAGAGACTTACTCTTGATTGCGGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCACTGAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAAATAAAGAATAGTGCAGTTAGCTTGCTCAACACCACAGCAATAGCAGTAGCTGAGGGGACAGATAGGATTATAGAAGTATTACAAAGAGGCTTTAGAGCTATTCTCCACATACCTACACGAATAAGACAGGGCTTAGAAAGGGCTTTGCTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 7798, - "end": 7890, - "orientation": "forward", - "distance": 0.5806451612903225, - "indel_impact": 0, - "protein": "RPSSQPRGDPTGPKEQKKEVERETEAHPRD", - "nucleotides": "AGACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAGGAACAGAAGAAGGAGGTGGAGAGAGAGACAGAGGCACATCCACGAGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 7799, - "end": 8074, - "orientation": "forward", - "distance": 0.423913043478261, - "indel_impact": 0, - "protein": "DPPPNPEGTRQARRNRRRRWRERQRHIHEISAWILSTHLGRPAEPVPLQLPPLERLTLDCGEDCGTSGTQGVGSTEVLVESPAVLESGNKE", - "nucleotides": "GACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAGGAACAGAAGAAGGAGGTGGAGAGAGAGACAGAGGCACATCCACGAGATTAGTGCATGGATTCTTAGCACTCATCTGGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAGAGACTTACTCTTGATTGCGGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCACTGAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAAATAAAGAATAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8218, - "end": 8859, - "orientation": "forward", - "distance": 0.7375565610859729, - "indel_impact": 0, - "protein": "MGNKLSRGLRAGWPAIRERMRRARPVREPEPAAAGVGAASRDLERHGALTSSNTAATNTDVACLEAQQEEEEVGFPVRPQVPLRPMTYKGALDISHFLKEKGGLDGLIYSKRRQDILDLWLYNTQGYFPDWQNYTPGPGVRFPLCFGWCFKLVPVDPDKVEEASVGENNCLLSPENLHGMEDEHREVLQWRFDSRLAFHHVARELHPEYYKDC", - "nucleotides": "ATGGGTAACAAGTTGTCAAGAGGGCTCAGGGCTGGATGGCCTGCCATAAGGGAGAGAATGAGACGAGCTAGGCCAGTAAGAGAGCCAGAGCCAGCAGCAGCTGGGGTGGGAGCAGCATCTCGAGACCTGGAAAGACATGGAGCACTTACAAGTAGCAATACAGCAGCTACCAATACTGATGTTGCCTGCCTGGAAGCACAACAGGAGGAGGAAGAGGTGGGTTTTCCAGTCAGACCTCAGGTACCTTTAAGGCCAATGACTTACAAGGGAGCTCTAGATATTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGATGGGCTAATTTACTCCAAAAGAAGACAAGACATCCTTGATCTGTGGCTCTACAACACACAAGGCTACTTCCCTGACTGGCAGAACTACACACCAGGGCCAGGAGTCAGATTTCCACTGTGTTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGATAAGGTAGAAGAGGCCAGTGTAGGAGAGAACAACTGCTTGTTAAGCCCCGAAAACCTGCATGGAATGGAGGACGAACACAGAGAAGTATTGCAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTATTACAAGGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115498.1": [ - { - "region": "gag", - "start": 663, - "end": 2162, - "orientation": "forward", - "distance": 0.2675944333996021, - "indel_impact": 0, - "protein": "MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", - "nucleotides": "ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGACGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGCTAATGTAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1955, - "end": 4966, - "orientation": "forward", - "distance": 0.15109343936381703, - "indel_impact": 0, - "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTACTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4911, - "end": 5489, - "orientation": "forward", - "distance": 0.36269430051813467, - "indel_impact": 0, - "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", - "nucleotides": "ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5429, - "end": 5719, - "orientation": "forward", - "distance": 0.2886597938144331, - "indel_impact": 0, - "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", - "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5700, - "end": 5914, - "orientation": "forward", - "distance": 0.25, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5839, - "end": 5914, - "orientation": "forward", - "distance": 0.46153846153846145, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5931, - "end": 6176, - "orientation": "forward", - "distance": 0.727586206896552, - "indel_impact": 0, - "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL", - "nucleotides": "ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAAAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 6094, - "end": 8634, - "orientation": "forward", - "distance": 0.5241695303550973, - "indel_impact": 0, - "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", - "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGAAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAAAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8216, - "end": 8308, - "orientation": "forward", - "distance": 0.29032258064516125, - "indel_impact": 0, - "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", - "nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8217, - "end": 8492, - "orientation": "forward", - "distance": 0.3913043478260869, - "indel_impact": 0, - "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", - "nucleotides": "GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8636, - "end": 9250, - "orientation": "forward", - "distance": 0.5333333333333332, - "indel_impact": 0, - "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", - "nucleotides": "ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTATGAGCCTGCATGGGATGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115211.1": [ - { - "region": "gag", - "start": 250, - "end": 1752, - "orientation": "forward", - "distance": 0.20813492063492078, - "indel_impact": 0, - "protein": "MGARASVLSGGELDKWEKIRLRPGGKKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPALQTGSEELKSLFNTVAVLYCVHQKIEIKDTKEALDKIEEEQNKSKKKTQQAAADTGHNSQVSQNYPIVQNMQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSNAVMMQKGNFRSQRKMVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFPQSRTEPTAPPEESFSFGEKTTTPSQKQEPIDQELYPLSSLKSLFGNDP", - "nucleotides": "ATGGGTGCGAGAGCGTCGGTATTAAGCGGGGGAGAATTAGACAAATGGGAGAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAGATATAAGTTAAAACATATAGTATGGGCAAGCAGGGAACTGGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCAGCCCTTCAGACAGGATCAGAGGAACTTAAATCATTATTTAATACAGTAGCAGTCCTCTATTGTGTGCATCAAAAGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAACAAAACAAAAGTAAGAAAAAGACACAGCAAGCAGCAGCTGACACAGGACATAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATGCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTGAAAGTAGTAGAAGAGAAGGCATTTAGCCCAGAAGTAATACCCATGTTTTCAGCCTTATCAGAAGGAGCCACCCCACAAGATTTGAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACTATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCGATTGCACCAGGCCAGATGAGAGAGCCGAGGGGAAGTGACATAGCAGGAACCACCAGTACCCTTCAGGAGCAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTACAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGATCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCAAACCCAGATTGTAAAACCATTTTAAAAGCATTGGGTCCAGCAGCTACACTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCGGAAGCAATGAGCCAAGTAACAAACTCAAATGCCGTGATGATGCAGAAAGGCAATTTTAGGAGCCAAAGAAAGATGGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCAAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCAGGGAACTTTCCTCAAAGCAGGACAGAACCAACAGCCCCACCAGAGGAGAGCTTCAGTTTTGGGGAAAAGACAACAACTCCTTCTCAGAAGCAGGAGCCGATAGACCAGGAACTATACCCTCTGTCTTCCCTCAAATCACTCTTTGGCAACGACCCTTAGTCACAGTAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1545, - "end": 4556, - "orientation": "forward", - "distance": 0.2109018830525271, - "indel_impact": 0, - "protein": "FFRENLAFPQGKARELSSKQDRTNSPTRGELQFWGKDNNSFSEAGADRPGTIPSVFPQITLWQRPLVTVRIGGQLKEALLDTGADDTVLEDMNLPGRWKPKMIGGIGGFIKVKQYDQISVEICGHKAIGTVLIGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALTEICKEMEKEGKISRIGPENPYNTPIFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKVEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGAKALTEVVPLTEEAELELAENREILKEPVHGVYYDPTKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWESWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRETKSGKAGYVTDRGRQKVISLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIQKEKVYLAWVPAHKGIGGNEQVDKLVSSGIRKVLFLDGIDKAQEDHEKYHSNWRAMASDFNLPPVIAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVTTIHTDNGPNFTSNAVKAACWWAGVKQEFGIPYNPQSQGVVESMNKQLKQIIGQVREQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKVKIIRDYGKQMAGDGCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCAGGGAACTTTCCTCAAAGCAGGACAGAACCAACAGCCCCACCAGAGGAGAGCTTCAGTTTTGGGGAAAAGACAACAACTCCTTCTCAGAAGCAGGAGCCGATAGACCAGGAACTATACCCTCTGTCTTCCCTCAAATCACTCTTTGGCAACGACCCTTAGTCACAGTAAGGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTGTTAGAAGACATGAATTTACCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAAACAGTATGATCAGATATCAGTAGAAATTTGTGGACATAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAATATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGTACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAGAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAACAGAAATTTGTAAGGAAATGGAAAAGGAAGGGAAAATTTCAAGAATTGGGCCTGAAAATCCATACAATACTCCAATATTTGCAATAAAGAAAAAAGACAGTACTAAATGGAGAAAGTTAGTAGATTTTAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACACCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCCTAGATAAAGACTTCAGGAAATATACTGCATTTACCATACCTAGTACAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTACTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCTGACATAGTTATATATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAGTAGAGGAACTGAGACAACATCTGTTGAAGTGGGGGTTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATACAACTGCCAGAGAAAGACAGCTGGACTGTCAATGATATACAGAAATTAGTGGGAAAGTTGAATTGGGCAAGTCAGATTTATCCAGGGATTAAAGTAAGGCAGTTATGTAAACTACTTAGAGGAGCCAAGGCGCTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATCCTAAAAGAACCAGTACATGGAGTGTATTATGACCCAACAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGTCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAACTTGAAAACAGGAAAATATGCAAGAACAAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAGACCCCTAAATTTAGACTACCCATTCAAAAAGAAACATGGGAATCATGGTGGACAGAATATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAAAAAGAACCCATAATAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACAAAATCAGGAAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAAGTTATTTCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAGGCACAACCAGATAAAAGCGAATCAGAGTTAGTCAATCAAATAATAGAGCAATTAATACAAAAGGAAAAGGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGGGGAAATGAACAAGTAGATAAATTAGTCAGTTCTGGAATCAGAAAGGTACTATTTTTGGATGGAATAGATAAAGCCCAAGAAGATCATGAAAAATATCACAGTAATTGGAGAGCAATGGCTAGTGACTTTAACCTACCACCTGTAATAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCTATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAATTAGATTGTACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCTAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGACAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAACAACAATACATACAGACAATGGCCCCAACTTCACCAGTAATGCAGTTAAGGCCGCCTGTTGGTGGGCAGGGGTCAAACAAGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTTGTAGAATCTATGAATAAACAATTAAAACAAATTATAGGGCAGGTAAGAGAGCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAGGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTGGTAATACAAGATAATAGTGACATAAAAGTAGTACCAAGAAGAAAAGTAAAAATTATTAGGGATTATGGAAAACAGATGGCAGGTGATGGTTGTGTGGCAAGTAGACAGGATGAGGATTAA", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4501, - "end": 5082, - "orientation": "forward", - "distance": 0.4215384615384614, - "indel_impact": 0, - "protein": "MENRWQVMVVWQVDRMRINMWKSLVKYHIHNSGKARGWCYRHHYESRHPRISSEVHIPLGGEARLVVTTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPELADQLIHLYYFNCFSESAIRQAILGHRVSPRCEYQAGHNKVGSLQYLALIALIPPKRRKPPLPSVRKLTEDRWNEPQRTKGHRESHTMNGH", - "nucleotides": "ATGGAAAACAGATGGCAGGTGATGGTTGTGTGGCAAGTAGACAGGATGAGGATTAACATGTGGAAAAGCTTAGTAAAATACCATATACATAATTCAGGGAAGGCTAGGGGATGGTGTTATAGACATCACTATGAAAGCAGACATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGAGGAGAGGCTAGACTAGTAGTGACAACATATTGGGGCCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAAGGAGTCTCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTATTTTAATTGTTTTTCAGAATCTGCTATAAGACAAGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAATAAGGTAGGATCTCTGCAGTACTTGGCACTAATAGCATTAATACCACCAAAAAGGAGAAAGCCACCTTTGCCTAGTGTGAGGAAACTGACAGAGGATAGATGGAACGAGCCCCAGAGGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5022, - "end": 5312, - "orientation": "forward", - "distance": 0.38144329896907214, - "indel_impact": 0, - "protein": "MERAPEDQGPQREPYNEWTLELLEELKREAVRHFPRAWLHGLGQHIYETYGDTWAGVEALIRTLQQLVFIHFRIGCQHSRIGILTHRRARNGASRS", - "nucleotides": "ATGGAACGAGCCCCAGAGGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAGAACTTTTAGAAGAGCTTAAAAGAGAAGCTGTTAGGCACTTTCCTAGGGCATGGCTTCACGGCCTAGGACAACATATCTATGAAACTTATGGAGATACTTGGGCAGGAGTGGAAGCTCTAATAAGAACTCTGCAACAACTGGTGTTTATTCATTTCAGAATTGGATGTCAGCACAGCAGGATAGGCATTCTTACCCATAGAAGGGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5293, - "end": 5507, - "orientation": "forward", - "distance": 0.547945205479452, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPKTACTSCYCKRCCYHCPRCFLTKGLGISYGRKKRRQRRRTPQDNQNHQVSLSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTGGAGCCCTGGAAGCATCCAGGGAGTCAGCCTAAGACTGCTTGCACCAGTTGCTATTGTAAAAGGTGTTGCTATCATTGCCCGCGATGCTTCTTAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAGGATAACCAGAACCATCAAGTTTCTTTATCAAAACA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5432, - "end": 5507, - "orientation": "forward", - "distance": 0.7037037037037037, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLRITRTIKFLYQNX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAGGATAACCAGAACCATCAAGTTTCTTTATCAAAACA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5524, - "end": 5793, - "orientation": "forward", - "distance": 0.847826086956522, - "indel_impact": 0, - "protein": "MQSLEILAIVALVVAFIIAIVVWSIVFIEYRKIRRQKRIDRILDRIRERAEDSGNESEGDQEELAALVEMGNHAHLGMGHHAPWEVNDL", - "nucleotides": "ATGCAATCTTTAGAAATATTAGCAATAGTAGCATTAGTAGTAGCATTTATAATAGCAATAGTCGTTTGGTCTATAGTATTCATAGAATATAGGAAAATAAGAAGACAAAAGAGAATAGACAGGATACTTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGACCAAGAAGAATTGGCAGCCCTCGTGGAGATGGGGAACCATGCTCATTTGGGGATGGGCCACCATGCTCCTTGGGAGGTCAATGACCTGTAA", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 5687, - "end": 8197, - "orientation": "forward", - "distance": 1.2241972477064227, - "indel_impact": 757, - "protein": "MHSFNCRGEFFYCNTTQLFNSTWNSTTGVNGTGANNTEVNIITLPCKIKQIVNMWQEVGKAMYAPPISGQISCSSNITGLLLLRDGGNSNDTDNTTEIFRPGGGDMRDNWRSELYKYKVVQIEPLGVAPTKARRRVVQREKRAVGIGALFLGFLGAAGSTMGAASITLTVQARHLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTNVPWNSSWSNKNLSYIWNNMTWMQWEKEIDNYTEVIYNLIEQSQNQQEKNEQELLELDKWASLWTWFDITKWLWYIKIFIMIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTLLPAARGPDRPEGIGEEGGERDRDRSSHLAHGFLTIIWVDLRSLFLFSYHHLRDLLLIVARGVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRFIEVLQRIFRAIIHIPTRIRQGLERALL", - "nucleotides": "ATGAGAGTGAAGGGGACCAAGAAGAATTGGCAGCCCTCGTGGAGATGGGGAACCATGCTCATTTGGGGATGGGCCACCATGCTCCTTGGGAGGTCAATGACCTGTAATGCAGAAGACTCGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACCACCACTCTATTTTGTGCATCAGATGCCAAAGCATATGACCCAGAGGTACACAATGTCTGGGCCACACATGCTTGTGTACCCACAGACCCCAACCCACAAGAAGTAGAATTAAATGTGACAGAAAATTTTAATGCATGGAAAAATAACATGGTAGATCAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAACTGCACTGAGTTGAAGAATAGCACTACTATAAACAATACCAGTAGTATAGAAGAAGGAGAAATGAAAAACTGTTCTTTTAATGCCACCACAGCAATAAGAGATAAGGTGCAAAAACAATATGCACTTTTTTATAGACTTGATGTAGTACAAATTAAGGATGATAATACCAGCTATAATACCAGATATAGGTTGATAAATTGTAACTCCGCAACCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTAGGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGTTGTTAAATGGCAGTCTAGCAGAAGGAGATGTAGTAATTAGATCTAAAAATTTCTCTAACAGTGCTCAAACCATAATAGTACAATTAAACGAAACTGTAGAAATTAATTGTACAAGACCCAACAATAATACAAGAAAAAGTATACATATAGGACCAGGGAGAGCATTTTATGCAACAGGAGACATAATAGGAGATATAAGAAAAGCACATTGTAACATTAGTAGAGCAAAATGGAATGAGACTTTAAAAAAGATAGCTGATAAATTAAGGGAACAATTTAAAAGAGAGACAATAGTCTTTAACCAATCCTCAGGAGGGGACCCAGAGATTGTAATGCACAGTTTTAATTGTAGAGGGGAATTTTTCTACTGTAATACAACACAACTGTTTAATAGTACTTGGAATAGTACTACAGGAGTAAATGGTACAGGAGCAAATAATACAGAAGTAAATATTATCACACTCCCATGCAAGATAAAGCAAATTGTAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGTTGTTCATCAAATATTACAGGGCTACTACTACTAAGAGATGGTGGTAATAGCAATGACACGGATAACACGACTGAAATCTTCAGACCTGGAGGAGGAGATATGAGAGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTACAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAGGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTCTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCATCAATAACGCTGACGGTACAAGCCAGACACTTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATATGTTGCAGCTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTAGAAAGATACCTTAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATCTGCACCACTAATGTGCCTTGGAATAGTAGTTGGAGTAATAAAAATCTGAGTTACATTTGGAATAACATGACCTGGATGCAGTGGGAAAAAGAAATTGACAATTACACAGAGGTAATATACAACTTAATTGAACAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGACTTGGTTTGACATAACAAAGTGGCTGTGGTATATAAAAATATTTATAATGATAGTAGGAGGCTTAATAGGTTTAAGAATAGTTTTTACTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAGCGCATGGATTCTTAACAATTATCTGGGTCGACCTGCGGAGCCTGTTCCTCTTCAGCTACCACCACTTGAGAGACTTACTCTTGATTGTAGCGAGGGGTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGCCAGGAACTAAAGAATAGTGCTGTCAGCTTGCTCAACGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGTTTATAGAAGTATTGCAAAGAATTTTTAGAGCTATTATCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCTTTGCTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 7779, - "end": 7871, - "orientation": "forward", - "distance": 0.4838709677419355, - "indel_impact": 0, - "protein": "RPSSQPRGDPTGPKESEKKVERETETDPVT", - "nucleotides": "AGACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 7780, - "end": 8055, - "orientation": "forward", - "distance": 0.4565217391304348, - "indel_impact": 0, - "protein": "DPPPSREGTRQARRNRRRRWRERQRQIQSLSAWILNNYLGRPAEPVPLQLPPLERLTLDCSEGCGTSGTQGVGSPQVLVESPAVLEPGTKE", - "nucleotides": "GACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAGCGCATGGATTCTTAACAATTATCTGGGTCGACCTGCGGAGCCTGTTCCTCTTCAGCTACCACCACTTGAGAGACTTACTCTTGATTGTAGCGAGGGGTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGCCAGGAACTAAAGAATAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8199, - "end": 8822, - "orientation": "forward", - "distance": 0.6435406698564594, - "indel_impact": 0, - "protein": "MGGIFSKRNGGGWPAIRERMRRAQPAADGVGAASRDLERHGALTSNNTPANNADCAWLEAQEEGEEVGFPVRPQVPLRPMTYKGAVDLSHFLKEKGGLEGLIYSQQRQDILDLWVYNTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPMEPEDVEKATEGEDNILLHPVNQHGMDDPDKEVLVWKFDSRLAFRHVAREIHPEYYKNC", - "nucleotides": "ATGGGAGGCATATTCTCAAAACGTAATGGGGGTGGGTGGCCTGCAATAAGGGAAAGAATGAGACGAGCTCAGCCAGCAGCAGATGGGGTAGGAGCAGCATCTCGAGACCTGGAAAGACATGGAGCTCTCACAAGTAACAATACACCAGCTAACAATGCTGATTGTGCCTGGCTGGAAGCACAGGAAGAGGGGGAAGAAGTGGGTTTCCCAGTCAGACCTCAGGTACCTTTAAGACCAATGACATACAAGGGAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGTTAATTTACTCCCAACAAAGACAAGATATCCTTGATCTGTGGGTCTACAACACACAGGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATAAGATATCCACTGACCTTTGGATGGTGCTTCAAGCTAGTGCCAATGGAACCAGAGGATGTAGAAAAGGCCACTGAAGGAGAGGACAACATCTTGTTACACCCTGTGAACCAGCATGGGATGGACGACCCAGATAAAGAAGTGTTAGTGTGGAAGTTTGACAGCCGCCTAGCATTCCGTCACGTGGCCCGAGAGATACATCCGGAGTACTACAAGAACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115158.1": [ - { - "region": "gag", - "start": 316, - "end": 1818, - "orientation": "forward", - "distance": 0.1990039840637452, - "indel_impact": 0, - "protein": "MGARASVLSGGELDKWEKIRLRPGGKKQYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPALQTGSEELKSLFNTVAVLYCVHQRIEIKDTKEALDKIEEEQNKSKKKTQQAAADPGNSNQVSQNYPIVQNMQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSASVMMQKGNFRSQRKMVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFPQSRTEPTAPPEESFSFGEKTTTPSQKQEPIDQELYPMSSLKSLFGNDP", - "nucleotides": "ATGGGTGCGAGAGCGTCGGTATTAAGCGGGGGAGAATTAGACAAATGGGAGAAAATTCGGTTAAGGCCAGGGGGAAAGAAACAATATAAGTTAAAACATATAGTATGGGCAAGCAGGGAACTGGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCAGCCCTTCAGACAGGATCAGAGGAACTTAAATCATTATTTAATACAGTAGCAGTCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAACAAAACAAAAGTAAGAAAAAGACACAGCAAGCAGCAGCTGACCCAGGAAATAGCAACCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATGCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTGAAAGTAGTAGAAGAGAAGGCTTTTAGCCCAGAAGTAATACCCATGTTTTCAGCCTTATCAGAAGGAGCCACCCCACAAGATTTGAACACGATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACTATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCGATTGCACCAGGCCAGATGAGAGAACCGAGGGGAAGTGACATAGCAGGAACCACCAGTACCCTTCAGGAGCAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTACAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGATCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCAAACCCAGATTGTAAGACCATTTTAAAAGCATTAGGTCCAGCAGCTACACTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCGGAAGCAATGAGCCAAGTAACAAACTCAGCTTCCGTGATGATGCAGAAAGGCAATTTTAGGAGCCAAAGAAAGATGGTTAAGTGTTTCAATTGTGGCAAAGAAGGACACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCAAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGGAACTTTCCTCAAAGCAGAACAGAACCAACAGCCCCACCAGAGGAGAGCTTCAGTTTTGGGGAAAAGACAACAACTCCTTCTCAGAAGCAGGAGCCGATAGACCAGGAACTATACCCTATGTCTTCCCTCAAATCACTCTTTGGCAACGACCCTTAGTCACAGTAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1611, - "end": 4622, - "orientation": "forward", - "distance": 0.20792864222001972, - "indel_impact": 0, - "protein": "FFRENLAFPQGEARELSSKQNRTNSPTRGELQFWGKDNNSFSEAGADRPGTIPYVFPQITLWQRPLVTVRIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVKQYDQISVEICGHKAIGTVLIGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALTEICKEMEKEGKISKIGPENPYNTPIFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTRILEPFRKQNPDMVIYQYMDDLYVGSDLEIGQHRTKVEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGAKALTEVVPLTEEAELELAENREILKEPVHGVYYDPTKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWESWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRETKSGKAGYVTDRGRQKVISLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIQKEKVYLAWVPAHKGIGGNEQVDKLVSSGIRKVLFLDGIDKAQEDHEKYHSNWRAMASDFNIPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVTTIHTDNGPNFTSNAVKAACWWAGVKQEFGIPYNPQSQGVVESMNKQLKQIIGQVREQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDGCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGGAACTTTCCTCAAAGCAGAACAGAACCAACAGCCCCACCAGAGGAGAGCTTCAGTTTTGGGGAAAAGACAACAACTCCTTCTCAGAAGCAGGAGCCGATAGACCAGGAACTATACCCTATGTCTTCCCTCAAATCACTCTTTGGCAACGACCCTTAGTCACAGTAAGGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTGTTAGAAGAAATGAGTCTACCAGGAAGATGGAAGCCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAAACAGTATGATCAGATATCAGTAGAAATTTGTGGACATAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAATATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGTACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAGAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAACAGAAATTTGTAAGGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAATATTTGCAATAAAGAAAAAAGACAGTACTAAATGGAGAAAGTTAGTAGATTTTAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACACCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCCTAGATAAAGACTTCAGGAAATATACTGCATTTACCATACCTAGTACAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTACTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAGAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATGGTTATATATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAGTAGAGGAACTGAGACAACATCTGTTGAAGTGGGGGTTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATACAACTGCCAGAGAAAGACAGCTGGACTGTCAATGATATACAGAAATTAGTGGGAAAGTTGAATTGGGCAAGTCAGATTTATCCAGGGATTAAAGTAAGGCAGTTATGTAAACTACTTAGAGGAGCCAAGGCGCTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATCCTAAAAGAACCAGTGCATGGAGTGTATTATGACCCAACAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGTCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAACTTGAAAACAGGAAAATATGCAAGAACAAGGGGTGCTCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAGACCCCTAAATTTAGACTACCCATTCAAAAAGAAACATGGGAATCATGGTGGACAGAATATTGGCAAGCCACTTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAAAAAGAACCCATAATAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACAAAATCAGGAAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAAGTTATCTCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAGGCACAACCAGATAAAAGCGAATCAGAGTTAGTCAATCAAATAATAGAGCAATTAATACAAAAGGAAAAGGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGGGGAAATGAACAAGTAGATAAATTAGTCAGTTCTGGAATCAGAAAGGTACTATTTTTGGATGGAATAGATAAGGCCCAAGAAGATCATGAAAAATATCACAGTAATTGGAGAGCAATGGCTAGTGACTTTAACATACCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAATTAGATTGTACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCTAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGACAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAACAACAATACATACAGACAATGGCCCCAACTTCACCAGTAATGCAGTTAAGGCCGCCTGTTGGTGGGCAGGGGTCAAACAAGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTTGTAGAATCTATGAATAAACAATTAAAACAAATTATAGGACAGGTAAGAGAGCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAACTTCTTTGGAAAGGTGAGGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTACCAAGAAGAAAAGCAAAAATTATTAGGGATTATGGAAAACAGATGGCAGGTGATGGTTGTGTGGCAAGTAGACAGGATGAGGATTAA", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4567, - "end": 5148, - "orientation": "forward", - "distance": 0.4523076923076923, - "indel_impact": 0, - "protein": "MENRWQVMVVWQVDRMRINMWKSLVKYHIHNSGKARGWCYRHHYESRHPRISSEVHIPLGGEARLVVTTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPDLADQLIHLYYFNCFSESAIRQAILGHRVSPRCEYQAGHNKVGSLQYLALIALIPPKRRKPPLPSVRKRTEDRWNEPQRTKGHRESHTMNGH", - "nucleotides": "ATGGAAAACAGATGGCAGGTGATGGTTGTGTGGCAAGTAGACAGGATGAGGATTAACATGTGGAAAAGTTTAGTAAAATACCATATACATAATTCAGGGAAGGCTAGGGGATGGTGTTATAGACATCACTATGAAAGCAGACATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGAGGAGAGGCTAGACTAGTAGTGACAACATATTGGGGCCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAAGGAGTCTCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTGACCTAGCAGACCAACTAATTCATCTGTATTATTTTAATTGTTTTTCAGAATCTGCTATAAGACAAGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAATAAGGTAGGATCTCTGCAGTACTTGGCACTAATAGCATTAATACCACCAAAAAGGAGAAAGCCACCTTTGCCTAGTGTGAGGAAACGGACAGAGGATAGATGGAACGAGCCCCAGAGGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5088, - "end": 5378, - "orientation": "forward", - "distance": 0.35051546391752586, - "indel_impact": 0, - "protein": "MERAPEDQGPQREPYNEWTLELLEELKREAVRHFPRAWLHGLGQHIYETYGDTWAGVEALIRILQQLVFIHFRIGCQHSRIGILTHRRARNGASRS", - "nucleotides": "ATGGAACGAGCCCCAGAGGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAGAACTTTTAGAAGAGCTTAAAAGAGAAGCTGTTAGGCACTTTCCTAGGGCATGGCTTCACGGCCTGGGACAACATATCTATGAAACTTATGGAGATACTTGGGCAGGAGTGGAAGCTCTAATAAGAATTCTGCAACAACTGGTGTTTATTCATTTCAGAATTGGATGTCAGCACAGCAGGATAGGCATTCTTACCCATAGAAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5359, - "end": 5573, - "orientation": "forward", - "distance": 0.547945205479452, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPRTACTSCYCKKCCYHCPRCFLTKGLGISYGRKKRRQRRRTPQDNQNHQVSLSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTGGAGCCCTGGAAGCATCCAGGGAGTCAGCCTAGGACTGCGTGCACCAGTTGCTATTGTAAAAAGTGTTGCTATCATTGCCCGCGATGCTTCTTAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAAGATAACCAGAACCATCAAGTTTCTTTATCAAAACA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5498, - "end": 5573, - "orientation": "forward", - "distance": 0.7965517241379307, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLKITRTIKFLYQNX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAAGATAACCAGAACCATCAAGTTTCTTTATCAAAACA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5590, - "end": 5859, - "orientation": "forward", - "distance": 0.8152173913043479, - "indel_impact": 0, - "protein": "MQSLEILAIVALVVAFIIAIVVWSIVLIEYRKIRRQKRIDRILDRIRERAEDSGNESEGDQEELAALVEMGNHAHLGMGHHAPWDVNDL", - "nucleotides": "ATGCAATCCTTAGAAATATTAGCAATAGTAGCATTAGTAGTAGCATTTATAATAGCAATAGTTGTTTGGTCTATAGTACTCATAGAATATAGGAAAATAAGAAGACAAAAGAGAATAGACAGGATACTTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGACCAAGAAGAATTGGCAGCCCTCGTGGAGATGGGGAACCATGCTCATTTGGGGATGGGCCACCATGCTCCTTGGGATGTCAATGACCTGTAA", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 5753, - "end": 8314, - "orientation": "forward", - "distance": 0.6048152295632705, - "indel_impact": 35, - "protein": "MRVKGTKKNWQPSWRWGTMLIWGWATMLLGMSMTCNAEDSWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVELNVTENFNAWKNNMVDQMHEDIISLWDQSLKPCVKLTPLCVTLNCTELKNSTGEMKNCSFNATTAIRDKVQKQYALFYRLDVVQIKDDNTSYNTRYRLINCNSSTITQACPKVSFEPIPIHYCAPAGFAILKCKDKNFNGTGLCENVSTVQCTHGIRPVVSTQLLLNGSLAEGDVVIRSKNFSNSAQTIIVQLNETVEINCTRPNNNTRKSIHIGPGRAFYATGDIIGDIRQAHCNISRAKWNETLKKIADKLREQFKRETIVFNQSSGGDPEIVMHSFNCRGEFFYCNTTQLFNSTWNSTTGVNGTGANNTEVNIITLPCKIKQIVNMWQEVGKAMYAPPISGQISCSSNITGLLLLRDGGNSNDTDNTTEIFRPGGGDMRDNWRSELYKYKVVQIEPLGVAPTKARRRVVQREKRAVGIGALFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTNVPWNSSWSNKNLSYIWNNMTWMQWEKEIDNYTEEIYNLIEQSQNQQEKNEQELLELDKWASLWTWFDITKWLWYIKIFIMIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTLLPAARGPDRPEGIGEEGGERDRDRSSHLAHGFLTIIWVDLRSLFLFSYHHLRDLLLIVARGVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRFIEVLQRIFRAIIHIPTRIRQGLERALL", - "nucleotides": "ATGAGAGTGAAGGGGACCAAGAAGAATTGGCAGCCCTCGTGGAGATGGGGAACCATGCTCATTTGGGGATGGGCCACCATGCTCCTTGGGATGTCAATGACCTGTAATGCAGAAGACTCGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACCACCACTCTATTTTGTGCATCAGATGCCAAAGCATATGACACAGAGGTACACAATGTCTGGGCCACACATGCTTGTGTACCCACAGACCCCAACCCACAAGAAGTAGAATTAAATGTGACAGAAAATTTTAATGCATGGAAAAATAACATGGTAGATCAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAACTGCACTGAGTTGAAGAATAGCACTGGAGAAATGAAAAACTGTTCTTTTAATGCCACCACAGCAATAAGAGATAAGGTGCAAAAACAATATGCACTTTTTTATAGACTTGATGTAGTACAAATTAAGGATGATAATACCAGCTATAATACCAGATATAGGTTGATAAATTGTAACTCCTCAACCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAATTTCAATGGAACAGGACTATGTGAAAATGTTAGCACAGTACAATGTACACATGGAATTAGACCAGTAGTATCAACTCAACTGTTGTTAAATGGCAGTCTAGCAGAAGGAGATGTAGTAATTAGATCTAAAAATTTCTCTAACAGTGCTCAAACCATAATAGTACAATTAAACGAAACTGTAGAAATTAATTGTACAAGACCCAACAATAATACAAGAAAAAGTATACATATAGGACCAGGGAGAGCATTTTATGCAACAGGAGACATAATAGGAGATATAAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATGAGACTTTAAAAAAGATAGCTGATAAATTAAGGGAACAATTTAAAAGGGAGACAATAGTCTTTAACCAATCCTCAGGAGGGGACCCAGAGATTGTAATGCACAGTTTTAATTGTAGAGGGGAATTTTTCTACTGTAATACAACACAACTGTTTAATAGTACTTGGAATAGTACTACAGGAGTAAATGGTACAGGAGCAAATAATACAGAAGTAAATATTATCACACTCCCATGCAAGATAAAGCAAATTGTAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGTTGTTCATCAAATATTACAGGGCTACTACTACTAAGAGATGGTGGTAATAGCAATGACACGGATAACACGACTGAAATCTTCAGACCTGGAGGAGGAGATATGAGAGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTACAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAGGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTCTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCATCAATAACGCTGACGGTACAAGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATATGTTGCAGCTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTAGAAAGATACCTTAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATCTGCACCACTAATGTGCCTTGGAATAGTAGTTGGAGTAATAAAAATCTGAGTTATATCTGGAATAACATGACCTGGATGCAGTGGGAAAAAGAAATTGACAATTACACAGAGGAAATATACAACTTAATTGAACAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGACTTGGTTTGACATAACAAAGTGGCTGTGGTATATAAAAATATTTATAATGATAGTAGGAGGCTTAATAGGTTTAAGAATAGTTTTTACTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAGCGCATGGATTCTTAACAATTATCTGGGTCGACCTGCGGAGCCTGTTCCTCTTCAGCTACCACCACTTGAGAGACTTACTCTTGATTGTAGCGAGGGGAGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGCCAGGAACTAAAGAATAGTGCTGTCAGCTTGCTCAACGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGTTTATAGAAGTATTGCAAAGAATTTTTAGAGCTATTATCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCTTTGCTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 7896, - "end": 7988, - "orientation": "forward", - "distance": 0.4838709677419355, - "indel_impact": 0, - "protein": "RPSSQPRGDPTGPKESEKKVERETETDPVT", - "nucleotides": "AGACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 7897, - "end": 8172, - "orientation": "forward", - "distance": 0.48913043478260865, - "indel_impact": 0, - "protein": "DPPPSREGTRQARRNRRRRWRERQRQIQSLSAWILNNYLGRPAEPVPLQLPPLERLTLDCSEGSGTSGTQGVGSPQVLVESPAVLEPGTKE", - "nucleotides": "GACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAGCGCATGGATTCTTAACAATTATCTGGGTCGACCTGCGGAGCCTGTTCCTCTTCAGCTACCACCACTTGAGAGACTTACTCTTGATTGTAGCGAGGGGAGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGCCAGGAACTAAAGAATAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8316, - "end": 8939, - "orientation": "forward", - "distance": 0.6291866028708133, - "indel_impact": 0, - "protein": "MGGIFSKRNGGGWPAIRERMRRAQPAADGVGAASRDLERHGALTSNNTPANNADCAWLEAQEEGEEVGFPVRPQVPLRPMTYKGAVDLSHFLKEKGGLEGLIYSQQRQDILDLWVYNTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPMEPEDVEKATEGEDNILLHPVNQHGMDDSEREVLVWKFDSRLAFRHVAREIHPEYYKNC", - "nucleotides": "ATGGGAGGCATATTCTCAAAACGTAATGGGGGTGGGTGGCCTGCAATAAGGGAAAGAATGAGACGAGCTCAGCCAGCAGCAGATGGGGTAGGAGCAGCATCTCGAGACCTGGAAAGACATGGAGCTCTCACAAGTAACAATACACCAGCTAACAATGCTGATTGTGCCTGGCTGGAAGCACAGGAAGAGGGGGAAGAAGTGGGTTTCCCAGTCAGACCTCAGGTACCTTTAAGACCAATGACATACAAGGGAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGTTAATTTACTCCCAACAAAGACAAGATATCCTTGATCTGTGGGTCTACAACACACAGGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATAAGATATCCACTGACCTTTGGATGGTGCTTCAAGCTAGTGCCAATGGAACCAGAGGATGTAGAAAAGGCCACTGAAGGAGAGGACAACATCTTGTTACACCCTGTGAACCAGCATGGGATGGACGACTCAGAGAGAGAAGTGTTAGTGTGGAAGTTTGACAGCCGCCTAGCATTCCGTCACGTGGCCCGAGAGATACATCCGGAGTACTACAAGAACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK114705.1": [ - { - "region": "gag", - "start": 532, - "end": 2046, - "orientation": "forward", - "distance": 0.23247524752475246, - "indel_impact": 0, - "protein": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNTIAVLYCVHQRIDVKDTKEALNKIEEEQNKSKKKAQQAAADTGNSSQSSQVSQNYPIVQNHQGQMVYQALSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHNPPIPVGEIYKRWIIMGLNKIVRMYSPISILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQATGGATNIMMQKGNFRNQGKPIKCFNCGKEGHLARNCRAPRKKGCWKCGKEGHQMKDCSERQANFLGKIWPSHKGRPGNFLQNRPEPTAPPEESFRFGEETTAPPQKQEPTDKELYPFSSLKSLFGNDPSSQ", - "nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGCGAATTAGATAGATGGGAGAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAGTTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCGGTTAATCCTGGCCTGTTAGAAACATCAGAGGGCTGTAGGCAAATACTGGGACAGCTACAACCGTCCCTTCAAACAGGATCAGAAGAACTTAAATCATTATTTAATACAATAGCAGTCCTTTATTGCGTACATCAAAGGATAGATGTAAAAGACACCAAGGAAGCTCTAAATAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGCAGCAGCTGACACAGGAAACAGCAGTCAGAGCAGTCAAGTCAGCCAAAATTACCCTATAGTGCAGAACCATCAGGGGCAAATGGTATATCAGGCTCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCCGAGGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAGGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTACATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGGACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACACATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCATGGGGTTAAATAAAATAGTAAGAATGTATAGCCCTATCAGCATTCTGGACATAAGACAAGGACCAAAAGAACCTTTTAGAGATTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAAGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACCATTCTAAAAGCATTAGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGGGGACCCAGCCACAAAGCAAGAGTCTTGGCTGAAGCAATGAGCCAAGCAACAGGTGGTGCAACTAACATAATGATGCAGAAAGGCAATTTTAGGAACCAAGGAAAACCTATTAAGTGTTTCAATTGTGGCAAAGAAGGGCACCTAGCTAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGACTGCTCTGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTCCTCCAGAACAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAGCTCCCCCTCAGAAGCAGGAGCCGACAGACAAGGAACTGTATCCCTTCTCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAGTAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1839, - "end": 4850, - "orientation": "forward", - "distance": 0.17412935323383083, - "indel_impact": 0, - "protein": "FFRENLAFPQGKAREFPPEQTRANSPTRRELQVWGRDNSSPSEAGADRQGTVSLLFPQITLWQRPLVTVKIGGQLKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTKIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDESFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIEQHRAKVENLREHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGAKALTEVITLTEEAELELAENREILKEPVHGVYYDPSKDLVAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIATESIVVWGKIPKFRLPIQKETWETWWMEYWQATWIPEWEFVSTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKKGKAGYVTDRGRQKVIPLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIKKEKVYLTWVPAHKGIGGNEQVDKLVSAGIRKILFLDGIDKAQEDHEKYHSNWKAMASDFNIPPVVAKEIIASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKIIHTDNGSNFTSTTVKAACWWAGVKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGEYCAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSEIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTCCTCCAGAACAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAGCTCCCCCTCAGAAGCAGGAGCCGACAGACAAGGAACTGTATCCCTTCTCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAGTAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAAATACCCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTAAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAACCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCCGTATTTGCCATAAAGAAAAAGGATAGTACTAAATGGAGAAAGTTAGTAGATTTCAGAGAGCTTAATAAAAGAACTCAAGACTTTTGGGAGGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTAGATGTGGGTGATGCATATTTCTCAGTTCCTTTAGATGAAAGCTTCAGAAAGTATACTGCATTTACCATACCTAGTACTAACAATGAGACACCCGGGATTAGGTATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTTCAAAGTAGCATGACGAAAATCTTAGAACCTTTTAGAAAACAAAATCCAGACATAGTTATCTACCAATACATGGATGATTTATACGTAGGATCTGACTTAGAAATAGAGCAGCATAGAGCAAAAGTAGAGAACCTGAGAGAGCATCTGTTGAGGTGGGGATTTACCACACCAGACAAAAAGCATCAGAAAGAACCTCCATTTCTTTGGATGGGCTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTATGCAGGGATTAAAGTAAAGCAATTATGTAAACTCCTTAGGGGAGCTAAGGCACTAACAGAAGTGATAACACTAACAGAAGAAGCAGAGCTAGAATTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAGTAGCAGAAATACAGAAGCAGGGGCAAGGTCAATGGACCTATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAGTATGCAAGAACAAGGGGTGCCCACACTAATGATGTAAAACAGTTAACAGAAGCAGTTCAAAAAATAGCCACAGAGAGCATAGTAGTATGGGGAAAGATTCCTAAATTTAGATTACCCATACAGAAAGAAACATGGGAAACATGGTGGATGGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAGTACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAGACTAAAAAAGGAAAAGCAGGATATGTTACTGACAGAGGAAGGCAAAAAGTTATCCCCTTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGACAAGAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAGAAGGTCTACCTGACATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCAGGAATCAGGAAAATACTATTTTTGGATGGAATAGATAAGGCCCAGGAAGATCATGAGAAATATCATAGTAATTGGAAAGCAATGGCTAGTGATTTTAACATACCACCTGTGGTAGCAAAAGAGATAATAGCCAGCTGTGATAAATGTCAGCTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTCCATGTAGCCAGTGGGTACATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTATCTTAAAATTAGCAGGAAGATGGCCAGTAAAAATAATACATACAGATAATGGTAGCAATTTCACCAGCACTACAGTCAAGGCCGCCTGCTGGTGGGCAGGTGTTAAGCAGGAGTTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTGGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGAGTACTGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAACTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAGGGACCAGCAAAGCTGCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGAAATCAAAGTAGTACCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4795, - "end": 5373, - "orientation": "forward", - "distance": 0.34375, - "indel_impact": 0, - "protein": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYISKKAKGWVYRHHYESTHPRISSEVHIPLGDAKLVITTYWGLHTGERDWHLGQGVSIEWRKKRYNTQVDPGLADQLIHMYYFDCFSESAIRQAILGHRVSPSCEYQAGHNKVGSLQYLALAVLVAPKKIKPPLPSVARLTEDRWNKPRKIKGHRESHTMNGH", - "nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTCTAGTAAAACACCATATGTATATTTCAAAGAAAGCTAAGGGATGGGTTTACAGACACCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAAATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGCCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAACACACAAGTAGACCCTGGCCTAGCAGACCAACTAATTCATATGTACTATTTTGATTGTTTTTCAGAATCTGCTATAAGACAAGCCATATTAGGACATAGAGTTAGTCCTAGTTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTGCAATACCTGGCACTAGCAGTATTAGTAGCACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTCGCGAGACTGACAGAGGATAGATGGAACAAGCCCCGGAAGATCAAGGGCCACAGAGAGAGCCATACAATGAATGGGCACTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5313, - "end": 5597, - "orientation": "forward", - "distance": 0.40312499999999996, - "indel_impact": 0, - "protein": "MEQAPEDQGPQREPYNEWALELLEELKREAVRHFPREWLHSLGQHIYETYGDTWAGVGAIIRILQQLLFIHFRIGCHHSRIGILRRTRNGARRS", - "nucleotides": "ATGGAACAAGCCCCGGAAGATCAAGGGCCACAGAGAGAGCCATACAATGAATGGGCACTAGAACTTTTAGAGGAGCTTAAGAGGGAAGCTGTTAGACATTTTCCTAGGGAATGGCTCCATAGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGGCAGGAGTAGGAGCCATAATAAGAATACTGCAACAATTACTGTTTATTCATTTCAGAATTGGGTGTCACCATAGCAGGATAGGCATACTGAGGAGAACAAGAAATGGAGCCCGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5578, - "end": 5792, - "orientation": "forward", - "distance": 0.655263157894737, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPRTPCNNCYCKKCCFHCQVCFTKKGLGISYGRKKRRQRRGPPRDSQIHQVPLPKX", - "nucleotides": "ATGGAGCCCGTAGATCCTAGACTGGAACCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTCCTTGTAACAATTGCTATTGTAAAAAGTGCTGCTTTCATTGCCAAGTTTGCTTCACAAAAAAGGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGCGACAGAGACGAGGACCTCCTCGAGACAGTCAGATTCATCAAGTTCCTCTACCAAAACA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5717, - "end": 5792, - "orientation": "forward", - "distance": 0.8076923076923077, - "indel_impact": 0, - "protein": "MAGRSGDRDEDLLETVRFIKFLYQNX", - "nucleotides": "ATGGCAGGAAGAAGCGGCGACAGAGACGAGGACCTCCTCGAGACAGTCAGATTCATCAAGTTCCTCTACCAAAACA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5809, - "end": 6054, - "orientation": "forward", - "distance": 0.8310344827586209, - "indel_impact": 0, - "protein": "MQPLEISAIVALVVVAIIAIVVWTIVLLEYRKILRQKKIDRLINRISERAEDSGNESDGDQEELSALMEMGRLAPWNVDDL", - "nucleotides": "ATGCAACCTTTAGAGATATCAGCAATAGTAGCATTAGTAGTAGTAGCAATAATAGCAATAGTTGTGTGGACCATAGTACTCTTAGAGTATAGGAAAATATTAAGGCAAAAGAAAATAGACAGATTAATTAATAGAATAAGTGAAAGAGCAGAAGACAGTGGCAATGAGAGTGACGGGGATCAAGAAGAATTGTCAGCTCTTATGGAGATGGGGCGCCTTGCTCCTTGGAATGTTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 5972, - "end": 8548, - "orientation": "forward", - "distance": 0.6253647586980922, - "indel_impact": 0, - "protein": "MRVTGIKKNCQLLWRWGALLLGMLMICSATNMWVTVYYGVPVWKDATTTLFCASDAKAYDTEIHNVWATHACVPTDPDPQEVVLENVTENYNMGKNNMVEQMHEDIISLWDQSLKPCVLLTPFCVTLNCTDANITSTNNSRDKKEGESTLEETKGEIKNCSFNMTSSMSDKSQKQRALFYKLDVVQIDETNNNSYRLISCNTSVVTQACPKVSFDPIPIHYCAPAGFAILKCNNKKFNGTGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVMIRSENFTNNAKTIIVQLKTPVQINCTRPNNNTRKRISMGPGRVIYATGQIIGDIRKAHCNISRAEWNTTLKQIVTQLRKQWNRTIIFNSSSGGDPEIVMHSFNCRGEFFYCNTTKLFNSTWPRNSTWNNTEGSNDTEIITLPCRIKQIVNRWQEVGKAMYAPPIQGQISCSSNITGLLLVRDGGINTSESNETFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGMLGAMFLGLLGAAGSTMGAASVTLTVQTRLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARLLAVERYLQDQQLLGIWGCSGKLICTTTVPWNTSWSNRSYEDIWNNMTWMEWEKEIDNYTGLIYTLIEKSQNQQEINEQELLSLDKWASLWNWFNITNWLWYIKIFIMIVGGLIGLRIVFAVLSIVNRVRQGYSPLSLQTPRPAPRGPDRPEEIEEGGGERDRDRSVRLVTGFFALFWDDLRSLCLFSYHHLRDLILIVVRVVEILGRRGWEALKYWWNLLQYWSQEIKNSAISLLNATAIAVAEGTDRIIGVVQRTWRAFIHIPRRIRQGFERALL", - "nucleotides": "ATGAGAGTGACGGGGATCAAGAAGAATTGTCAGCTCTTATGGAGATGGGGCGCCTTGCTCCTTGGAATGTTGATGATCTGTAGTGCTACAAACATGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGATGCAACCACCACTCTATTCTGTGCATCAGATGCTAAAGCATATGATACAGAGATACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCGACCCACAAGAAGTAGTACTGGAAAATGTGACAGAAAATTATAATATGGGAAAAAATAACATGGTGGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTACTCTTAACCCCATTCTGTGTCACTTTAAATTGCACTGATGCTAACATCACCAGCACTAATAATAGTAGAGATAAGAAGGAAGGAGAAAGTACATTGGAGGAGACGAAAGGAGAAATAAAAAACTGCTCTTTCAATATGACTTCAAGCATGAGCGATAAGTCTCAGAAACAACGTGCACTTTTTTATAAGCTTGATGTGGTACAAATAGATGAGACTAATAATAATAGTTATAGGTTGATAAGTTGTAACACCTCAGTCGTCACACAGGCTTGTCCAAAGGTATCCTTTGATCCAATCCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAATAATAAGAAATTCAATGGAACAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAACCTGTAGTGTCAACCCAACTGCTGTTAAATGGTAGTCTAGCAGAAGAAGAAGTAATGATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTGCAGCTGAAGACACCTGTACAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGGATAAGTATGGGACCAGGGAGAGTAATTTATGCAACAGGACAAATAATAGGAGATATAAGAAAAGCACATTGCAACATTAGTAGAGCAGAATGGAATACAACTTTAAAGCAGATAGTTACACAATTAAGAAAGCAGTGGAATAGAACCATAATCTTTAACTCATCCTCAGGAGGGGACCCAGAAATTGTGATGCACAGTTTTAATTGTAGAGGGGAATTTTTCTACTGTAATACAACAAAACTATTTAATAGTACTTGGCCACGTAATAGTACTTGGAATAATACTGAAGGGTCAAATGACACTGAAATAATCACACTCCCGTGCAGAATAAAACAAATTGTAAACAGGTGGCAGGAAGTAGGCAAAGCAATGTATGCCCCTCCCATCCAAGGACAAATTAGTTGTTCATCAAATATTACAGGGCTGCTACTAGTTAGAGATGGTGGAATTAACACCAGTGAGAGCAACGAGACCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAGGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATGCTGGGAGCTATGTTCCTTGGGCTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGTTGACGGTACAGACCAGACTATTATTGTCTGGTATAGTGCAACAGCAGAACAATCTGCTGAGGGCTATTGAGGCGCAGCAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAACAGCTCCAGGCAAGACTCCTAGCTGTGGAAAGATACCTACAGGATCAACAGCTCCTAGGGATTTGGGGTTGCTCTGGAAAACTCATCTGCACCACTACTGTGCCTTGGAACACTAGTTGGAGTAATAGATCTTATGAAGATATTTGGAACAACATGACCTGGATGGAGTGGGAAAAAGAAATTGACAATTACACAGGCTTAATATACACCTTAATTGAAAAATCGCAGAACCAGCAGGAAATAAATGAACAAGAACTATTGTCATTGGATAAGTGGGCAAGCCTGTGGAATTGGTTTAATATAACAAATTGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGATAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTGTCATTGCAGACCCCCCGCCCAGCCCCAAGGGGACCCGACAGGCCCGAAGAAATCGAAGAAGGAGGTGGAGAAAGAGACAGAGACAGATCAGTGCGATTAGTGACTGGATTCTTCGCACTTTTCTGGGACGATCTACGAAGCCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTAATCTTGATTGTAGTGAGGGTTGTGGAAATTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTGCAGTACTGGAGTCAGGAAATAAAGAATAGTGCTATCAGCTTGCTCAACGCCACAGCTATAGCAGTAGCTGAGGGGACAGATAGAATCATAGGAGTAGTACAAAGAACTTGGAGAGCTTTTATCCACATACCTAGGAGAATAAGACAGGGCTTTGAAAGGGCTTTGCTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8130, - "end": 8222, - "orientation": "forward", - "distance": 0.9303030303030304, - "indel_impact": 0, - "protein": "RPPAQPQGDPTGPKKSKKEVEKETETDQCD", - "nucleotides": "AGACCCCCCGCCCAGCCCCAAGGGGACCCGACAGGCCCGAAGAAATCGAAGAAGGAGGTGGAGAAAGAGACAGAGACAGATCAGTGCGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8131, - "end": 8406, - "orientation": "forward", - "distance": 0.5591397849462365, - "indel_impact": 0, - "protein": "DPPPSPKGTRQARRNRRRRWRKRQRQISAISDWILRTFLGRSTKPVPLQLPPLERLNLDCSEGCGNSGTQGVGSPQILVESPAVLESGNKE", - "nucleotides": "GACCCCCCGCCCAGCCCCAAGGGGACCCGACAGGCCCGAAGAAATCGAAGAAGGAGGTGGAGAAAGAGACAGAGACAGATCAGTGCGATTAGTGACTGGATTCTTCGCACTTTTCTGGGACGATCTACGAAGCCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTAATCTTGATTGTAGTGAGGGTTGTGGAAATTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTGCAGTACTGGAGTCAGGAAATAAAGAATAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8550, - "end": 9117, - "orientation": "forward", - "distance": 0.9368932038834918, - "indel_impact": 122, - "protein": "MGGKWSKKSGGGWPAVREKMKRTEPAAEGVGAASRDLDKYGAITSSNTAQTNPDCAWLEAQEEEEVGFPVRPQVPLRPMTYKAAVDMSHFLKEKGGLEGLIHSQKRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGGCFK", - "nucleotides": "ATGGGTGGAAAATGGTCAAAAAAGAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAAAATGAAGCGAACTGAGCCAGCAGCAGAGGGGGTGGGAGCAGCATCTCGAGACCTGGACAAATATGGAGCAATCACAAGTAGCAATACAGCACAGACCAATCCTGATTGTGCCTGGCTAGAAGCACAAGAAGAGGAAGAGGTAGGCTTTCCAGTCAGACCCCAGGTACCTTTGAGACCAATGACTTACAAGGCAGCTGTGGATATGAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAAGAGACAAGATATCCTTGATCTGTGGATCTATCACACACAAGGCTACTTCCCTGATTGGCAAAATTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGAGGGTGCTTCAAGTAGTGTGTGCCCGTCTGTTGTGTGACTCTGGTAACTAGAGATCCCTCAGACCATTTTAGTCAGCGTGGAAAATCTCTAGCAGTGGCGCCCGAACAGGGACCTGAAAGCGAAAGTAGAACCAGAGGAGATCTCTCGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK114856.1": [ - { - "region": "gag", - "start": 492, - "end": 2021, - "orientation": "forward", - "distance": 1.871000000000001, - "indel_impact": 828, - "protein": "MTKTLLVQNANPDCKTILKALGPAATLEEMMTACQKVRRPGHKAKVLAEAMSQATGAANIMMQRGNFKNQRKPVKCFNCGKEKHIAKNCKAPKKKGC", - "nucleotides": "ATAGGTGCGAGAGCGTCAGTATTGAGCAGAGGAGAATTAGATAGATAGGAGAAAATTCAGTTAAGGCCAAGGAGAAAGAAAAAATATAGATTAAAACATATAGTATAGGCAAGCAAGGAACTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGGAGGCTGTAAACAGATATTAGAACAGCTACAACCATCCCTTCAGACAAGATCAGAAGAACTTAGATCATTATATAATACAGTAGCCACCCTCTATTATGTACATCAAAAGATAGATGTAAAAGACACCAAAGAAGCGTTAGACAAAGTAGAAGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGCGGCAGCTGACACAAGAAACAGAGGCCAGACCAGTCAAAATTTCCCTATAGTGCAGAACCTACAAGGGCAAATAGTACATCAGGCCATATCACCTAGAACTTTAAATGCATAAGTAAAAGTAGTAGAAGAAAAAGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACTATGCTAAACACAGTAAGTAGACACCAAGCAGCTATGCAAATGTTAAAAAAGGTCATCAATGAAGAAGCTGCAGAATAAGATAGATTACATCCAGTGCATGCAAGGCCTATTGCACCAGGCCAGATAAGAGAACCAAAAAGAAGTGACATAGCAAGAACTACTAGTACCCTTCAGGAACAAATAAGATAGATGACACATAATCCACCTATCCCAGTAAGAAAGATTTATAAAAGATAAATAATTCTAGGACTAAATAAAATAGTAAAAATGTATAGCCCTACCAGCATTTTAGACATAAAGCAAAGGCCAAAAGAACCCTTTAGAGACTATGTAGACCAGTTCTATAAAACTTTAAGAGCCAAGCAAGCTACACAGAAAGTAAAAAATTAGATGACAAAAACCTTGTTAGTCCAGAATGCAAATCCAGATTGTAAAACTATTTTAAAAGCATTAGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAAAAAGTAAGAAGACCCGGCCATAAAGCAAAAGTTTTAGCTGAAGCAATGAGCCAAGCAACAGGTGCAGCCAACATAATGATGCAGAGAGGCAATTTTAAGAACCAAAGAAAGCCTGTTAAATGTTTCAATTGTGGCAAAGAAAAGCACATAGCCAAAAATTGCAAGGCCCCTAAGAAAAAAGGCTGTTAGAAATATAGAAAAGAAAGACACCAAATGAAAAATTGCACTAAGAGACAGGCTAATTTTTTAAAGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGAGAACTTTCCTCAAAGCCGACCAGAAATTCCTCAGAGCAGACCAGAGCCAACAGCCCCACCAGCCCCTCCAGAAAAGAGCTTCAAGTTTGAAGAAGCAACAACTCCCTCTCAGAAGCAGGAGACAATAGACAAGGAGCTGTATCCTCTAACTTCCCTCAGATCACTCTTTGGCAACGACCCATCGTCACAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1787, - "end": 4825, - "orientation": "forward", - "distance": 0.5422287390029328, - "indel_impact": 1998, - "protein": "ASQIYAKIKVKQLCKLLKRTKALTEVVPLTEEAELELAENRKILKDPVHRAYYDPAKDLIAELQKQREGQWTYQIYQKPFKNLKTEKYARTKGAHTNDVKQLTEAVQKISTESIVI", - "nucleotides": "TTTTTTAAAGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGAGAACTTTCCTCAAAGCCGACCAGAAATTCCTCAGAGCAGACCAGAGCCAACAGCCCCACCAGCCCCTCCAGAAAAGAGCTTCAAGTTTGAAGAAGCAACAACTCCCTCTCAGAAGCAGGAGACAATAGACAAGGAGCTGTATCCTCTAACTTCCCTCAGATCACTCTTTGGCAACGACCCATCGTCACAATAAAGATAAGAGAGCAACTAAAGAAAGCTTTATTAAATACAGGAGCAGATGATACAGTATTAGAAGACATAGATTTGCCAAGAAAATAGAAACCAAAAATGATAAGAAGAATTAGAAGTTTTATCAAAGTAAGACAATATGAACAGGTACCCATAGAAATTAGCAGACACAAAGCTATAAGTACAGTGTTAGTAGGACCTACACCTGTCAACATAATTAGAAGAAATCTGTTGACTCAGCTTAGTTGCACTCTAAATTTTCCCATCAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAAGAATGGACGGCCCAAAAGTTAAACAATAGCCATTAACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATAGAAAAAGAAAAGAAAATTTCAAAAATTAGGCCTGAAAATCCATACAATACTCCAGTGTTTGCCATAAAGAAAAAAGATGGCACTAAATAGAAAAAATTAGTAGATTTCAGAGAACTTAATAAGAAAACTCAAGATTTCTAAGAAATTCAATTAAGTATACCACATCCTGCAAAGCTAAAAAAGAAAAAATCAGTCACAGTACTAGATGTAGGTGATGCATATTTTTCAGTTCCCTTAGATAAAGACTTCAAGAAATATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAAAGATTAGATATCAGTATAATGTGCTTCCACAAAGATAGAAAAGATCACCAGCAATATTCCAAAGTAGCATGACAAAGATCCTAGAGCCTTTTAGAAAGACAAATCCAGACATAGTTATCTATCAATACATAGATGATTTGTATGTAAGATCTGACTTAGAAATAAGGCAACATAGAACAAAAGTAAAGGAACTGAGGCAACATCTAATGAGGTAAAGATTTACCACACCAGACAAAAAGCACCAGAAAGAACCCCCATTCCTTTAGATGAGGTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTACTGCCAGAAAAAGAAAGTTAGACTGTCAATGACATACAGAAGTTAGTAAGAAAATTGAATTAGGCAAGTCAGATTTATGCAAAGATTAAAGTGAAGCAATTATGTAAGCTCCTTAAAAGAACCAAAGCATTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAAAGATTCTAAAAGATCCAGTACATAGAGCATATTATGACCCAGCAAAGGACTTAATAGCAGAACTACAGAAGCAAAGAGAAGGTCAGTGGACATATCAAATTTATCAAAAGCCATTTAAAAATCTAAAAACAGAGAAATATGCAAGAACGAAAGGTGCCCATACTAATGATGTAAAGCAATTAACAGAAGCAGTGCAAAAAATATCCACAGAAAGTATAGTAATATAAAGAAGGACTCCTAAATTTAGATTACCCATACAAAAAGAAACATAAGAAACATAGTGGACAGATTATTGGCAAGCCACCTAGATCCCTAAGTAAGAGTTTGTCAACACCCCTCCCTTAGTGAAATTATAGTACCAGTTAGAAAAAGAACCCATAATAAGAGCAGAAACCTTCTATGTAGATAAGGCAGCTAATAAAGATAATAAATCAAGAAAAGCAAGATATGTTACTGACAGAAGAAGACAAAAAGTTGTCACCATAAATGACACAACAAATCAGAAAACTGAATTACAAGCCATTCATCTAGCCTTGCAAGATTCAGGGCTAGAAGTAAACATAGTAACAGACTCACAGTATGCATTAAAGATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTCAGTCAAATAATAGAACAGTTAATAAAAAAGAAAAAAGTCTACCTGGCATAAGTGCCAGCCCACAAAAGAATTAAAAGAAATGAACAGGTAAATAAACTAGTCAGTGCTAGAATCAAGAAAGTACTATTTTTAGATAAAATAGAAAAAGCCCAAGAAGACCATAAAAAATATCACAGTAATTAAAGAACAATGGCTAGTAATTTTAACCTACCACCTATAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAAAAGAAGCTATGCATAGGCAAGTAGACTGTAGTCCAGGTATATGGCAGCTAGATTGTACACATTTAGAAGAAAAAATTATCCTAGTAGCAGTTCATGTAGCCAGTAGATATATAGAAGCAGAAGTGATTCCAGCAGAGACAAGGCAAGAAACAGCATACTTTATTTTAAAACTAGCAAGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCCGCAATTTCACCAGTAATACAGTGAAGGCCGCCTGTTAGTAGGCAAAAATCAAGCAAGAATTTAGTATTCCCTACAATCCCCAAAGTCAAGAAGTAGTAAAATCTATAAATAATGAATTAAAGAAAATTATAAGACAAGTAAAAGATCAGGCTAAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGAAGGATACAGTGCAGAGGAAAGAATAGTAGACATAATAGCAACAGAAATACAGACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCAGGTTTATTACAAGGACAGCAGAGATCCACTTTAGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAAAGGCAGTAGTAATACAAGATAACAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAAATCATTAAGGATTATAAAAAACAGATGGCAAGTGATGATTGTGTGGCAAGTAGACAGGATGAAGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4770, - "end": 5348, - "orientation": "forward", - "distance": 1.928125, - "indel_impact": 328, - "protein": "MIVWQVDRMKIRTWKSLVKYHMYISKKAKK", - "nucleotides": "ATAAAAAACAGATGGCAAGTGATGATTGTGTGGCAAGTAGACAGGATGAAGATTAGAACATGGAAAAGTTTAGTAAAATACCATATGTATATTTCAAAGAAAGCCAAGAAATAGGCTTATAGACACCATTACGAAACCACTCATCCAAGAATAAGTTCAGAAGTACATATCCCACTAGGAGGTGCTAGATTAGTAATAACAACATATTAAGGTCTGCATACAGGAGAAAAAGACTGGCATTTAGGCCAGGGAGTCTCCATAGAATGGAGAAAAAGGAGATATAGCACACAAGTAGACCCTGACTTAGCAGACCATCTAATTCATGTGCATTATTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATAGAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAAGTAAGATCTCTACAATACTTGGCACTAACAGCATTAATAACACCAAAGAAGATAAAGCCACCTTTGCCTAGTGTAAAGAAACTGACAGAAGATAGATAGAACAAGCCCCAGAAGACCAAAGGCCACAGAAAGAGCCATACAATGAATAGACACTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5288, - "end": 5578, - "orientation": "forward", - "distance": 1.7061855670103092, - "indel_impact": 143, - "protein": "MLFIHFRIKCHHSRIGIVLQRRARNRASRS", - "nucleotides": "ATAGAACAAGCCCCAGAAGACCAAAGGCCACAGAAAGAGCCATACAATGAATAGACACTAGAGCTTTTAGAAGAGCTTAAGAGAGAAGCTGTTAGACATTTCCCTAGACCATGGCTACAAAACCTAAGACAATATATCTATGAAACTTATAAAGATACTTGGACAAGAGTAGAAGCCATAATAAGAATTCTGCAACAAATGCTGTTTATTCATTTCAGAATTAAGTGTCACCATAGCAGAATAGGCATTGTTCTGCAAAGAAGAGCAAGAAATAGAGCCAGTAGATCATAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5559, - "end": 5773, - "orientation": "forward", - "distance": 0.5, - "indel_impact": 123, - "protein": "MKLIKILGQE", - "nucleotides": "ATAGAGCCAGTAGATCATAGACTAGAGCCCTAGAAGCATCCAGGAAGTCAGCCTAGGACTCCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTGTGTTTTATACAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5698, - "end": 5773, - "orientation": "forward", - "distance": 0.5769230769230769, - "indel_impact": 0, - "protein": "MAGRSGDRDEDLLKTVRLIKFLYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5790, - "end": 6038, - "orientation": "forward", - "distance": 1.7719512195121947, - "indel_impact": 122, - "protein": "MQPLKILAIVALVVAAIIAIVV", - "nucleotides": "ATGCAACCTTTAAAGATATTAGCAATAGTAGCATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTAGACCATAGTAGGCATAAAATATAAGAAAATATTAAGACAAAGAAAAATAGATAGAATAATTAATAGAATAAGAAAAAGAGCAGAAGACAGTGGCAATGAGAGCGAAAAAGATCAGGAAGAATTGTCAGCACTTGTAGTAGAAATAAGGCACGATGCTCCTTAAGATGTTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 5953, - "end": 8520, - "orientation": "forward", - "distance": 1.890116279069764, - "indel_impact": 1453, - "protein": "MTNCSFNITTEIRDKVRKEYALFYKLDVMPIDKDNTSYTLINCNTSVITQACPKVTFEPIPIHYCTPASFAILKCKDEMFNRTRPCKNVSTVQCTHRIRPVVSTQLLLNSSLAEKKIVLRSENFTDNTKNIIVQLNRSIVINCTRPNNNTRKSISVAKRAIYATRQIIEDIRQAHCNISETD", - "nucleotides": "ATGAGAGCGAAAAAGATCAGGAAGAATTGTCAGCACTTGTAGTAGAAATAAGGCACGATGCTCCTTAAGATGTTGATGATCTGTAGTGCTGCGGAAAATTTGTAAGTCACAGTCTATTATGAGGTACCTGTGTAAAGAGATGCAAATACCACTCTATTCTGTGCATCAGATGCTAAAGCATATGATACAGAAGTACATAATGTTTAGGCCACACATGCCTGTGTACCCACAGACCCTAACCCACATGAAGTAGAATTGAAAAATGTGACAGAAAATTTTAATATGTAGAAAAATAACATGGTAGACCAGATGCATGAGGATATAATCAATTTATGAGATCAAAGCCTAAAGCCATGTGCAAAATTGACCCCACTCTGTGTTACTTTAAATTGCACTGATTTAAAAAATAATACTGTAGGAAATCAAACAAATTATCATCTCAATGAAACTAATACAATACAAAGAAAAGAAATGACAAACTGCTCTTTCAATATCACCACAGAGATAAGAGATAAAGTGCGAAAAGAATATGCACTTTTTTATAAACTTGATGTCATGCCAATAGATAAAGATAATACAAGTTATACATTGATAAATTGCAATACCTCAGTCATTACACAGGCCTGTCCAAAGGTAACCTTTGAACCAATTCCCATACATTATTGTACTCCAGCTAGTTTTGCAATTCTAAAGTGTAAAGATGAGATGTTCAATAGAACAAGACCATGTAAGAATGTCAGCACAGTACAATGTACACATAGAATTAGACCAGTAGTGTCAACTCAACTGCTGTTAAATAGTAGCCTAGCAGAAAAAAAGATAGTACTTAGATCTGAAAATTTCACAGACAATACTAAAAATATAATAGTACAGCTAAATAGATCTATAGTAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGTATAAGTGTAGCAAAGAGAGCAATTTATGCAACAAGACAGATAATAGAAGATATAAGACAAGCACATTGTAACATCAGTGAGACAGATTGAAGTGACACTTTAAGCAAAATAGTTGAAAAATTAAAGGAAAAATTTAGAAAAAATAAAACAATAATCTTTAAGCAATCATCAAGAGAGGACATAGAAATTGAAACGCACAGTTTTAATTGTAGAGAGGAATTTTTCTACTGTAACACAACACGACTGTTTAATAGTACTTGAAGTGTTAATAGAACTAGCATAAACAGAACTAACAATAAAAATATCACACTCCCGTGCAGAATAAAACAAATTATCAACAGGTGGCAGGAAGTAAGAAAAGCAATGTATGCCCCTCCTATCAGTAAGATAATTAGGTGCTCATCAAATATTACAGGACTAATATTAACAAGAGATAGTAGTACAACTAATAGTAAAGAAGAGACCTTCAGACCTAGAGAAAGAAATATGAAGGACAATTAGAGAAGTGAATTATATAAATACAAAGTAGTAAAAATTGAGCCATTAAAAGTAGCACCCACCAAGGCACAAAGAAAAGTAGTGCAGAGAGAAAAAAGAGCAATAAGAACGTTAGGAGCTATGTTCCTCAGGTTCTTAAGAACAGCAGGAAGCACTATAGGCGCAGCGTCACTGACGCTGACAGTACAAGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATCTGCTGAAGGCTATTGAGGCGCAACAGCATATGTTGCAACTCACAGTCTAAGGCATCAAACAGCTCCAGGCAAGAGTCCTGTCTGTAGAAAGATACCTACAAGATCAACAGCTCCTGAAGATTTGAAGTTGCTCTAGAAAACTCATTTGCACCACTACTGTGCCTTAGAATACTAGTTAGAGCAATAAATCTTACAGTACCATCTAAGATAACATGACCTAGATGCAGTAGGACAGAGAAATTCAAAATTACACAAAGATAATATACAACTTACTTAAAGAATCGCAAATCCAACAGAAAAAGAATGAAAAAGAATTATTAGAACTAGATCAATGAGCAAATTTGTAGAATTAGTTTAGTATAACAAAATGGCTATAGTATATAAAAATATTCATAATGATAGTAGGAGGCTTAGTAAGTTTAAAAATAATTTTTGCTGTATTTTCTATAGTGAATAGAGTTAGGCAAGGATACTCACCATTATCATTGCAGACCCTCCTCCCAACCCCAAGAAGACCAGACAGGCCCGAAAGAATAGAAGAAGAAGGTGAAAAGAGAGACAGAGGCAGATCCAGAAGATTAGTGACTAGATTCTTACCACTCATCTAAGACGACCTGCAAAGCCTGTGCCTTTTCAGCTACCACCACTTAAAAGACTTACTCTTGATTGTGCTGAAGACTGTGCAAATTCTAGGACACAAGAAGTAAGAGATCCTCAAGTACTAGTAGAGTCTCCTGCAGTATTAAATTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATACCATAGCTATCGCAGTAGCTGAAAGAACAGATAAAGTTATAGAAGTAAGACAAAAAATTAGCAGAGCTTTTCTCCACATACCTAGAAAGATAAGACAAGGCTTAGAAAAGGCTTTGCAATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8102, - "end": 8194, - "orientation": "forward", - "distance": 0.7741935483870968, - "indel_impact": 37, - "protein": "RPSSQPQEDQTGPKE", - "nucleotides": "AGACCCTCCTCCCAACCCCAAGAAGACCAGACAGGCCCGAAAGAATAGAAGAAGAAGGTGAAAAGAGAGACAGAGGCAGATCCAGAAGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8103, - "end": 8378, - "orientation": "forward", - "distance": 0.8478260869565217, - "indel_impact": 69, - "protein": "ILTTHLRRPAKPVPFQLPPLKRLTLDCAEDCANSRTQEVRDPQVLVESPAVLNSGTKE", - "nucleotides": "GACCCTCCTCCCAACCCCAAGAAGACCAGACAGGCCCGAAAGAATAGAAGAAGAAGGTGAAAAGAGAGACAGAGGCAGATCCAGAAGATTAGTGACTAGATTCTTACCACTCATCTAAGACGACCTGCAAAGCCTGTGCCTTTTCAGCTACCACCACTTAAAAGACTTACTCTTGATTGTGCTGAAGACTGTGCAAATTCTAGGACACAAGAAGTAAGAGATCCTCAAGTACTAGTAGAGTCTCCTGCAGTATTAAATTCAGGAACTAAAGAATAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8522, - "end": 9175, - "orientation": "forward", - "distance": 1.9499999999999997, - "indel_impact": 403, - "protein": "MSLHRMEDSEGEVLMWKFDSSLAFHHRAREQHPEYYKDC", - "nucleotides": "ATAAGTGGCAAGTAGTCAAAAAGTTGTATGGCTAGATAGCCTGCTGTAAAAGAAAGAATAGAAAGAGTTAATCCAAGGCCTGCTGCAAAGAAAGAACAAGCTGAGCCAGCAGCAGCTAAGGTAAGAGCAGCATCTCGAGACTTAGAAAAATATAGAGCAATCACAAGTAGCAATACATCAACTACTAATGCTGCTTGTGCCTAGCTAGAAGCACAAGAGGAAGAAGAAGTAGGCTTTCCAGTCAGACCTCAAGTACCTTTAAGACCAATGACTTACAAAGCAGCTTTAAATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTAGTCCCAGAGAAGACAAGAAATCCTTGATTTGTGGGTTTACCACACACAAGGATACTTCCCTGATTGGCAAAACTACACACCAGGACCAGGGGTCAGAAATCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGACCAAGTAGAGAAGGCCAATGAAGGAGAGAACAACTGCTTGCTACACCCTATGAGCCTGCATAGGATGGAAGACTCAGAGGGAGAAGTGTTAATGTGGAAGTTTGACAGCAGCCTGGCGTTTCATCATAGAGCCCGAGAACAACATCCGGAGTACTACAAAGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115009.1": [ - { - "region": "gag", - "start": 302, - "end": 1820, - "orientation": "forward", - "distance": 1.8444000000000005, - "indel_impact": 844, - "protein": "MTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVRGPGHKARVLAEAMSQVTKSASIIVQGGNFKNQRKNVKCFNCGKERHTAKNCRAPKKKGC", - "nucleotides": "ATGGGTGCTAGAGCGTCAGTATTAAGCGGCGGAAAATTAGATAGATAGGAAAAAATTTACCTAAGGCCAGAAGGAAAGAAAAAATATAGATTAAAACATATAGTATAGGCAAGCAGGGAGCTAGAACGATTCGCAGTCAACCCTGGCCTGTTAGAGTCATCAGAAGGCTGCAGGCAAATTCTAGGACAGCTACAACCAGCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAGTCCTCTATTGCGTACATCAAAGGATAAATGTAAAAGACACCAAGGAAGCTCTAGACAAAATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAACAAGCAGCAGCTGACACAGGAAACAGCAGCAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACCTACAGGGGCAAATAGTACATCAACCATTATCACCTAGAACTTTAAATGCATAGGTAAAAGTAATAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCACTATCAGAAAGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGAGAGGACATCAAGCAGCCATGCAGATGTTAAAAGAAACCATCAATGATGAGGCTGCAGAATAGGATAGATTGCATCCAGTGCATGCAGGCCCTATTGCACCAGGCCAGATGAAAGAACCAAGAGGAAGTGACATAGCAGGAACCACTAGTACCCTTCAGGAACAAATAAGATAGATGACACATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTAAGGCTAAACAAAATAGTAAGAATGTATAGCCCTGTCAGCATTTTGGACATAAGACAAAGACCAAAAGAACCCTTTAAAGATTATGTAGACCGATTCTATAAAACTCTAAAGGCTGAGCAAGCGTCACAGGATGTAAAAAATTAGATGACAGAAACCTTGTTAGTCCAAAATGCGAATCCAGATTGTAAGACCATTTTAAAAGCATTAGGACCAGCAGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTAAGAGGACCCGGCCACAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAAGTCAGCCTCCATAATAGTGCAAGGAGGCAATTTTAAGAACCAAAGAAAGAATGTTAAGTGTTTCAATTGTGGCAAAGAAAGACACACAGCCAAAAATTGCAGGGCCCCTAAGAAAAAAGGCTGTTAAAAATGTAGAAAGGAAAGACACCAAATGAAAGATTGTACTAAGAGACCAGACTAAGACGGCTAATTTTTTAAGGAAGATTTGGCCTTCCCACAAGGGAAGGCCAGAGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTGAGGAAGAGGCAACAGCTCCTCCTCAGAAGCAGGAGACGAAAGACCAAGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1613, - "end": 4624, - "orientation": "forward", - "distance": 0.4463220675944335, - "indel_impact": 1728, - "protein": "AEIKQEFSIPYNPQSQRVVKSMNNKLKKIIGQVKDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQISKIQNFQVYYRDSREPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKILRDYRKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAAGGAAGATTTGGCCTTCCCACAAGGGAAGGCCAGAGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTGAGGAAGAGGCAACAGCTCCTCCTCAGAAGCAGGAGACGAAAGACCAAGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAAAAATAGGAGGGCAACTAAAAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATAAATTTGCCAGGAAAATAGAAACCAAAAATGATAAAAAGAATTAGAAGTTTTATCAAAGTAAGACAGTATGATCAGGTACCCATAGAAATTTGTGGACATAAAGCTATAGGTACAGTATTAATAGGACCTACGCCAGTCAACATAATTAGAAGAAATCTGTTGACCCAGCTTAGTTGCACTTTAAATTTTCCCATCAGTCCTATTGAAACTGTACCAGTAAAATTAAAGTCAGGAATAGATAGCCCAAGAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAGGCATTAATAGAAATTTGTGCAGAAATAGAAAAGGAAAAGAAAATTACAAAAATTAGGCCTGAGAATCCATATAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGAAAAAAATTAGTAGATTTCCGAGAACTTAATAAGAGAACACAAGACTTCTAAGAAGTTCAACTAAGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTAGATGTAGGTGATGCATATTTTTCAGTTCCTTTAGACAAAGACTTCAGAAAGTATACTGCATTTACCATACCCAGTATAAACAATGAGGCACCAGAAGTTAAATATCAGTACAATGTGCTTCCACAGAGATAGAAAAGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGGAAAATCCAGACATAGTTATCTATCAATACATAGATGATTTGTATGTAAGATCTGACTTAGAAATAGAGCAGCACAGAACAAAAATAGAAGAACTGAGACAACATCTGTTAAGGTGAGGACTCACCACACCAGACAAGAAACATCAGAAAAAACCTCCATTTCTTTAGATAAGTTATGAACTCCATCCTGATAAATAGACAGTACAGCCTATACAGCTGCCAGACAAAGACAGCTAGACTGTCAATGACATACAGAAGTTAGTAAGAAAGTTGAATTAGGCAAGCCAGATCTATCCAGAGATTAAAGTAAAGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGCATATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGAGGAGAGACCAATAGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGAGAAATATGCAAGAACGAGAGGTGCCCACACTAATGATGTAAAACAGTTAACAGAGGCAGTGCAAAAAATAGCCCTAGAAGCCATAGTAATATGAAGAAAGACTCCTAAATTTAAACTACCTATACAAAAAGAAACATAGAAAATGTGGTGGACAAAGTATTGGCAAGCCACCTAGATTCCTGAGTAAGAATTTGTCAATACCCCTCCCTTAGTAAAACTATAGTACCAGTTAGAGAAAGAACCCATAGTAAGAGCAGAAACTTTCTATGTAAATAAGGCAGCTAATAGAAAGACTAAATTAAGAAAAGCAGAGTATGTTACGGACAGAAGAAGACAAAAGGTTGTCTCCCTAATAGACACAACAAATCAGAGGACTAAGTTACACGCAATTCATCTAGCTTTGCAGGATTCAGGATCAACAGTAAACATAGTGACAGACTCACAATATGCCTTAAAGATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGCTAATAAAAAAAGAAAAGATTTACCTGGCATAAGTCCCAGCACACAAAAGAATTGAAGGAAATGAACAAGTAGATAAATTAGTCAGTAATAGAATCAGAAGAGTACTATTTCTAGATAGAATAGATAAAGCCCAAGAAGAACATGAAAAATATCACAGTAATTAGAGAGCAATGGCTAGTAATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAGTGTCAGTTAAAAGGAGAACCTATGCATAGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAAGAAAAATTATCCTAGTAGCAGTTCATGTAGCCAGTAGATACATAGAAGCAGAAGTTATTGCAGCAGAAACAGGGCAGGAAACAGCATACTTTATCTTAAAATTAGCAGGAAGATAGCCAGTAAAAATAATACATACAGACAATGGCAGCAATTTCACCAGCACTACAGTTAAAGCCGCCTGTTAGTAGGCAGAGATCAAGCAGGAATTTAGTATTCCCTACAATCCTCAAAGTCAAAGAGTAGTAAAATCTATGAATAATAAATTAAAGAAAATTATAGGACAGGTAAAAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATTCATAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAACTACAAAAACAAATTTCAAAAATTCAAAATTTTCAGGTTTATTACAGGGACAGCAGAGAACCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCCTTAGAGATTATAGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4569, - "end": 5147, - "orientation": "forward", - "distance": 1.990212765957447, - "indel_impact": 166, - "protein": "MAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQISKIQNFQVYYRDSREPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKILRDYRKQMAGDDCVASRQDED", - "nucleotides": "ATAGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATAGAACAGTTTAGTAAAACACCATATGCATATTTCAAAGAAAGCTCAGAGATAGGTTTACAGACATCACTATGAAAGCCATAATCCAAAAACAAGTTCAGAAGTACACATCCCATTAGAGGAAGCAAGATTGGTAATAAAAACATATTAAGGTCTGCATACAGGAGAAAGAGACTGGCATTTAGGCCAGGGAGTATCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTGGCCTGGCAGACCAACTAATTCACCTGTATTATTTTGATTGTTTTTCAGAATCTGCTATAAGGAATGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAGGCAGGACATAACAAGGTAGGATCCCTACAATACTTGGCATTAACAGCATTAATAACACCAAAAAGGAGAAAGCCACCTTTGCCCAGTGTTACAAAATTAACAGAGGACAGATGGAACAAGTCCCAGAGGACCAAAGGCCACAAAAGGAGCCATACAATAAATGGACATTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5087, - "end": 5377, - "orientation": "forward", - "distance": 1.2802083333333347, - "indel_impact": 88, - "protein": "MEQVPEDQRPQKEPYNKWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDT", - "nucleotides": "ATGGAACAAGTCCCAGAGGACCAAAGGCCACAAAAGGAGCCATACAATAAATGGACATTAGAGCTCTTAGAAGAGCTTAAGAGTGAAGCTGTTAGGCATTTTCCTAGACCATGGCTTCATAGCTTAGGACAATATATCTATGAAACTTATGGAGATACTTAGGCAGAGGTAGAAGCCATAATAAGAACCCTGCAACAACTGTTGTTCATTCATTTCAGAATTAGGTGTCAACATAGCAGGATAGGAATTATTCGACAGAGGAGAGCAAGAAATAGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5358, - "end": 5572, - "orientation": "forward", - "distance": 0.6712328767123288, - "indel_impact": 35, - "protein": "MTACNNCYCKRCCFHCQVCFTTKSLGISYGRKKQRQRRRASHSSQNHQAALPEX", - "nucleotides": "ATAGAGCCAGTAGATCCTAGCTTAGAGCCCTAGAAACATCCAGGAAGTCAACCTATGACTGCTTGTAACAATTGCTATTGTAAACGGTGTTGCTTTCATTGCCAAGTTTGCTTCACAACAAAAAGCTTAGGCATCTCCTATGGCAGGAAGAAGCAGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5497, - "end": 5572, - "orientation": "forward", - "distance": 0.7037037037037037, - "indel_impact": 0, - "protein": "MAGRSRDSDEELLTAVRIIKRLYQSX", - "nucleotides": "ATGGCAGGAAGAAGCAGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5589, - "end": 5834, - "orientation": "forward", - "distance": 1.0363636363636366, - "indel_impact": 0, - "protein": "MHALEIAAIAGLVVAAIIAIVVWSIVLIEYKKILRQRKIDRLINRIRERAEDSGNESDEDQEELSALVEMRHLVP", - "nucleotides": "ATGCATGCCTTAGAAATAGCAGCAATAGCAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGTCTATAGTATTAATAGAATATAAGAAAATTTTAAGACAAAGGAAAATAGACAGGTTAATTAATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGACGAGGACCAAGAGGAATTATCCGCACTTGTGGAGATGAGGCATCTTGTTCCTTAAGATAGTAATGATATGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 5752, - "end": 8352, - "orientation": "forward", - "distance": 1.8788617886178816, - "indel_impact": 1437, - "protein": "MKEKGEIKNCSFNVTTGIRDKVTKEHALFYKLNVVPIDEDSKNTTGKYKMINCNTSVITQACPKVSFKPIPIHFCAPASFAILKCKDKKFNRTGECRNVSTVQCTHRIRPVVSTQLLLNGSLAEEKIVIRSANLSNNAKTIIVQLNKSVKINCTRPNNNTRRSIHIGPGRAFYRTDIIRDIRQAHYNISKKD", - "nucleotides": "ATGAGAGTGACGAGGACCAAGAGGAATTATCCGCACTTGTGGAGATGAGGCATCTTGTTCCTTAAGATAGTAATGATATGTAGTGCCAACAACTTGTAGGTCACAGTCTATTATAAAGTACCTGTATAGAAAGAAGCAACTACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGAGACAGAGAAGCATAATGTTTAGGCAACCCATGCCTGTGTACCCACAGACCCCAGCCCACAGGAAGTAGCATTAGAAAATGTGACAGAAACATTTAACATGTAGAAAAATGACATGGTAGAGCAGATGCATGAGGATATAATCAGTTTATAGGATCAAAGCCTAAAGCCATGTGTGAAACTAACCCCACTCTGTGTTACTTTAAATTGCACTGATAATTTAAATCTAACTTGCCCTAACAATAATACTTGTAGTAATAATACTAATTATAATATGAAAGAAAAAGGAGAAATAAAAAACTGCTCTTTCAATGTCACCACAGGAATAAGAGATAAGGTGACAAAAGAACATGCACTTTTCTATAAACTTAATGTAGTACCAATAGATGAAGATAGTAAAAATACTACGGGCAAATATAAGATGATAAATTGTAACACCTCAGTCATTACACAAGCCTGTCCAAAGGTATCCTTTAAGCCAATTCCCATACATTTTTGTGCCCCGGCTAGTTTTGCGATTCTAAAGTGTAAAGATAAGAAATTCAATAGAACAGGAGAATGTAGAAATGTCAGCACAGTACAATGTACACATAGAATTAGGCCAGTAGTATCAACTCAACTGCTGCTAAACGGCAGTCTAGCAGAAGAAAAGATAGTAATTAGATCTGCCAATCTCTCGAACAATGCTAAAACCATAATAGTACAGCTGAATAAATCTGTAAAAATTAATTGTACAAGACCCAACAATAATACAAGAAGAAGTATACACATAGGACCAGGGAGGGCATTTTATAGAACAGACATAATAAGAGATATAAGACAAGCGCATTATAACATTAGTAAGAAAGATTAGAATGACACTTTAAAACAGATAGTTATAAAATTAAAAGAAAAATTTAAGAATAAAACAATAGTCTTTACTCAATCCTCAGGAGAAGACCCAGAGATAGTGATGCATAGTTTTAATTGTAGAGAGGAATTTTTCTACTGTAATACAACACAGCTGTTTAATAGTACTTAGAATAATAGTACTTAGAATAGTACTGATAATTAGAATAGTACTGAATCAAATAGCACTATCACACTCCCATGCAGAATAAAACAAATTATAAACTTGTGGCAGGAAGTAAGAAGAGCAATGTATGCCCCTCCCATCCAAAGGCAAATTAGATGTTCATCAAATATTACAGGGCTGCTGCTAGTAAGAGATGGTAGGAGCAACAATAGTAGTAATGATACAGAGACCTTCAGGCCTAGAGGAAGAGATATGAAGGACAATTAGAGAAGTAAATTATATAAATATAAAGTAGTCAAAATTAAACCATTAAGAATAGCACCCACCCATGCAAAGAGAAGAGTGGTGCAAAAGGAAAAAAGAGCAATAAGACTTGAAGCTTTCTTCCTTAGGTTCTTAAGAGCAGCAGGAAGCACTATAGGCGCAGCGTCAGTGACGCTGACAGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTAAGGCATCAAGCAGCTCCAGGCAAAAGTCCTGGCTTTAGAAAGATACCTAAAAGATCAACAGCTCCTAAGGATTTAAAGTTGCTCTGGAAAACTCATTTGCACCACTAATGTGCCCTGGAATATTAGTTGGAGCCCTAGATGGAATAGATCTCTAGATAAGATTTAGACTAACATGACCTAGAAGCAGTAAGAAAAAGAAATTGACAATTACACAGACATAATATATAACTTAATTGAACAATCACAGAACCAACAAGAACAGAATGAACAAGACTTATTGGCATTAGATAAGTAGGCAAGTTTGTAAAATTAGTTTGACATTACACAGTAGCTATAGTATATAAAAATATTCATAATAATAGTAAGAGGCTTGATAAGTTTAAGAATAGTTTTTACTATACTGTCTATAGTGAATAAAGTTAGGCAAGGATACTCACCATTGTCATTGCAGACCCTCCTCCCAACCCAGAGAGGACCCGACAGGCCCGAAAGAACAGAAGAAAGAGGTAGAAAGAAAGACAGAGGCACATCCACGAGATTAGTGCATAGATTCTTAGCACTCATCTAGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAAAGACTTACTCTTAATTGCGGCGAGAATTGTAGAACTTCTAGGACGCAGAAAGTAAGAAGCACTGAAGTATTAGTAGAATCTCCTGCAGTATTAGAGTCAGGAAATAAAGAATAGTGCAGTTAGCTTGCTCAACACCACAGCAATAGCAGTAGCTGAGAGGACAGATAAAATTATAGAAGTATTACAAAGAGGCTTTAGAGCTATTCTCCACATACCTACACGAATAAGACAGGGCTTAGAAAAGGCTTTGCTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 7934, - "end": 8026, - "orientation": "forward", - "distance": 0.7741935483870968, - "indel_impact": 0, - "protein": "RPSSQPREDPTGPKEQKKEVERKTEAHPRD", - "nucleotides": "AGACCCTCCTCCCAACCCAGAGAGGACCCGACAGGCCCGAAAGAACAGAAGAAAGAGGTAGAAAGAAAGACAGAGGCACATCCACGAGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 7935, - "end": 8210, - "orientation": "forward", - "distance": 0.7826086956521738, - "indel_impact": 70, - "protein": "ILSTHLGRPAEPVPLQLPPLERLTLNCGENCRTSRTQKVRSTEVLVESPAVLESGNKE", - "nucleotides": "GACCCTCCTCCCAACCCAGAGAGGACCCGACAGGCCCGAAAGAACAGAAGAAAGAGGTAGAAAGAAAGACAGAGGCACATCCACGAGATTAGTGCATAGATTCTTAGCACTCATCTAGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAAAGACTTACTCTTAATTGCGGCGAGAATTGTAGAACTTCTAGGACGCAGAAAGTAAGAAGCACTGAAGTATTAGTAGAATCTCCTGCAGTATTAGAGTCAGGAAATAAAGAATAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8354, - "end": 8995, - "orientation": "forward", - "distance": 1.7004854368932047, - "indel_impact": 395, - "protein": "MTYKRALDISHFLKEKGGLDGLIYSKRRQDILDLWLYNTQGYFPDWQNYTPGPGVRFPLCFR", - "nucleotides": "ATAGGTAACAAGTTGTCAAGAAGGCTCAAGGCTAGATGGCCTGCCATAAAGGAGAAAATAAGACGAGCTAGGCCAGTAAGAGAGCCAGAGCCAGCAACAGCTAAGGTAAGAGCAGCATCTCGAGACCTGAAAAGACATGGAGCACTTACAAGTAGCAATACAGCAGCTACCAATGCTGATGTTGCCTGCCTAGAAGCACAACAGAAGAAAGAAGAGGTAAGTTTTCCAGTCAGACCTCAGGTACCTTTAAGGCCAATGACTTACAAGAGAGCTCTAGATATTAGCCACTTTTTAAAAGAAAAAGGGGGACTGGATGGGCTAATTTACTCCAAAAGAAGACAAGACATCCTTGATCTGTGGCTCTACAACACACAAGGCTACTTCCCTGACTGGCAGAACTACACACCAGGGCCAGGAGTCAGATTTCCACTGTGTTTTAGATAGTGCTTCAAGCTAGTACCAGTTGATCCAGATAAAGTAGAAGAGGCCAGTGTAAGAGAGAACAACTGCTTGTTAAGCCCCGAAAACCTGCATAGAATGGAGGACGAACACAGAGAAGTATTGCAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACATAGCCCGAGAGCTGCATCCGGAGTATTACAAGGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115387.1": [ - { - "region": "gag", - "start": 292, - "end": 1794, - "orientation": "forward", - "distance": 0.21157684630738527, - "indel_impact": 0, - "protein": "MGARASVLSGGELDKWEKIRLRPGGKKRYKLKHIVWASRELERFAVNPGLLETTEGCKQILEQLQPSLPTGSEELRSLFNTVATLYCVHKRIEVQDTKEALEKIEEEQNKSKKKAQQAVADTGSTSQVSQNYPIVQNLQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPVAPGQMREPRGSDIAGTTSTLQEQIAWMTSNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYKVLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSNIIMMQRGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSSKGRPGNFLQSRPEPTAPPEESFRFGEETATPPQKQEPVDKEVYPLASLKSLFGNDPSSQ", - "nucleotides": "ATGGGTGCGAGAGCGTCTGTATTAAGCGGGGGAGAATTAGATAAATGGGAAAAAATTCGGTTAAGGCCAGGAGGAAAGAAAAGATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAACCCCGGCCTTTTAGAAACAACAGAGGGATGTAAACAAATACTGGAACAGCTGCAACCATCCCTTCCGACAGGATCAGAAGAACTTAGATCATTATTTAATACAGTAGCAACCCTCTATTGTGTGCATAAGAGAATAGAGGTACAAGACACCAAGGAAGCCTTAGAAAAGATAGAAGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAGGCAGTAGCTGACACAGGAAGTACCAGCCAGGTCAGCCAAAATTACCCGATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGGCCATATCGCCTAGAACTTTAAATGCATGGGTGAAAGTAGTAGAAGAGAAGGCCTTTAGCCCAGAGGTAATACCCATGTTTTCAGCATTATCGGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAGGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTGTTGCACCAGGCCAAATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAAGAACAAATAGCATGGATGACAAGTAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTGGGATTAAATAAGATAGTAAGAATGTATAGCCCTACCAGCATTTTGGACATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAGTCCTAAGAGCCGAGCAAGCATCGCAGGATGTAAAAAATTGGATGACAGAAACCTTATTGGTCCAAAATGCAAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCAGCAACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGGGGACCCGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAAATATCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGACTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTCCAAGGGAAGGCCAGGGAACTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAGGAGAGCTTCAGGTTTGGGGAGGAGACAGCAACTCCTCCTCAGAAGCAGGAGCCGGTGGACAAGGAAGTGTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1587, - "end": 4598, - "orientation": "forward", - "distance": 0.13147410358565748, - "indel_impact": 0, - "protein": "FFREDLAFLQGKARELSSEQTRANSPTRGELQVWGGDSNSSSEAGAGGQGSVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVQLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSITVLDVGDAYFSIPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMIKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLKWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPDKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLKGAKALTEVIQLTEEAELELAENREILKEPVHGVYYDPSKDLIAELQKQGQGQWTYQIYQEPFKNLKTGKYARTRGTHTNDVKQLTEAVQKITTESIVIWGKTPRFKLPIQKETWDTWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTSRGRQKVVSLTDTTNQKTELQAICLALQDSGLEVNIVTDSQYALGIIQAQPDRSESEIVNQIIEQLIKKERVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQEEHEKYHNNWRAMASDFNLPPVVAKEIVASCDKCQLKGEATHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKAIHTDNGTNFTSATVKAACWWAGIKQECGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDNRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTCCAAGGGAAGGCCAGGGAACTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAGGAGAGCTTCAGGTTTGGGGAGGAGACAGCAACTCCTCCTCAGAAGCAGGAGCCGGTGGACAAGGAAGTGTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGAGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGACACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCTATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGCAGAAATTTGTTGACTCAGATTGGTTGTACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTACAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAAGAAGGGAAGATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGGGAACTTAATAAAAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACACCCCGCAGGGTTAAAAAAGAAGAAATCAATAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAATAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGATAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGACATAGTTATCTATCAATACATGGATGACTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACACTTGTTGAAGTGGGGATTGACCACACCAGATAAAAAACATCAGAAAGAACCCCCATTCCTGTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGACAAAGATAGCTGGACTGTCAATGACATACAGAAGCTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTATGCAGGGATTAAAGTGAGACAATTATGTAAACTCCTTAAGGGAGCCAAAGCGCTAACAGAAGTGATACAACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAGTTACAGAAGCAGGGGCAAGGCCAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAGTATGCAAGAACGAGGGGTACCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTGATATGGGGAAAGACCCCTAGATTTAAACTACCCATACAGAAAGAAACATGGGATACCTGGTGGACAGAATATTGGCAAGCCACCTGGATTCCCGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAAAAAGAACCTATTGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAGCAGAGGAAGACAAAAGGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAACTGCAAGCAATTTGTCTAGCATTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCACTAGGAATAATTCAAGCACAACCAGATAGGAGTGAATCAGAGATAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAGGGTCTACCTTGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTCCTATTTTTGGATGGAATAGATAAGGCCCAAGAGGAGCATGAGAAATATCACAATAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGTTGTGATAAATGCCAGCTAAAGGGAGAAGCCACGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAATTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAAGAAACAGCATACTTCCTCTTAAAATTAGCAGGAAGGTGGCCAGTGAAAGCAATACATACAGACAATGGAACCAATTTCACCAGTGCTACGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATGTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAAGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTCCAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAACAGAGATCCACTTTGGAAAGGACCAGCAAAACTTCTTTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4543, - "end": 5121, - "orientation": "forward", - "distance": 0.32164948453608244, - "indel_impact": 0, - "protein": "MENRWQVMIVWQVDRMRISTWKSLVKHHMYISKKAQGWFYRHHYENPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIECRERKYSTQVTPDLADQLIHLYYFDCFAESAIREAILGHRVSPRCEYQAGHNKVGSLQYLALTALVTPKKTKPPLPSVTKLTEDRWNKPQKTKGHRGSQTMNGH", - "nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGCACATGGAAAAGTTTAGTAAAACACCATATGTACATTTCAAAGAAAGCCCAGGGATGGTTTTATAGACATCACTATGAAAATCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCGCTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAAGGAGTCTCCATAGAATGCAGGGAAAGGAAATATAGCACACAAGTAACCCCTGACCTAGCAGACCAACTAATTCACCTGTATTATTTTGATTGTTTTGCAGAATCTGCTATAAGAGAGGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCACTAACAGCATTAGTAACACCAAAAAAGACAAAGCCACCTTTGCCTAGTGTGACAAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCAAACAATGAATGGACACTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5061, - "end": 5351, - "orientation": "forward", - "distance": 0.21875, - "indel_impact": 0, - "protein": "MEQAPEDQGPQREPNNEWTLELLEELKREAVRHFPRTWLHGLGQHIYETYGDTWTGVEALIRILQQLLFIHFRIGCQHSRIGITRQRRARNGASRS", - "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCAAACAATGAATGGACACTAGAGCTCTTAGAAGAGCTTAAGAGAGAAGCTGTTAGACATTTTCCTAGGACATGGCTTCATGGCTTGGGACAACATATCTATGAAACTTATGGGGATACTTGGACAGGAGTGGAAGCCTTAATAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGAATTGGGTGCCAACATAGCAGAATAGGCATCACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5332, - "end": 5546, - "orientation": "forward", - "distance": 0.375, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPKTPCTKCYCKKCCFHCQVCFITKGLGISYGRKKRRQRRRPPQDSQTHQVPLPKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGATTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAGACTCCTTGTACCAAATGCTATTGTAAAAAATGCTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTACCTCTACCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5471, - "end": 5546, - "orientation": "forward", - "distance": 0.46153846153846145, - "indel_impact": 0, - "protein": "MAGRSGDSDEDLLKTVRLIKYLYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTACCTCTACCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5563, - "end": 5808, - "orientation": "forward", - "distance": 0.7764705882352942, - "indel_impact": 0, - "protein": "MQSLYILTIVALVVAAILAIVVWAIVLIEYKKILKQRRIDRLIDRIIDRAEDSGNESEGDQEELSALVEMGHHAPWNVDDL", - "nucleotides": "ATGCAATCCTTATATATATTAACAATAGTAGCATTAGTAGTAGCAGCAATATTAGCAATAGTTGTGTGGGCCATAGTACTCATAGAATATAAGAAAATATTAAAACAAAGGAGAATAGATAGGTTAATTGATAGAATAATAGATAGGGCAGAAGACAGTGGCAATGAGAGTGAAGGAGATCAAGAGGAGTTATCAGCACTTGTGGAGATGGGGCATCATGCTCCTTGGAATGTTGATGATCTATAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 5726, - "end": 8287, - "orientation": "forward", - "distance": 0.6926339285714282, - "indel_impact": 0, - "protein": "MRVKEIKRSYQHLWRWGIMLLGMLMIYSTADQWWVTVYYGVPVWREANTTLFCASDAKAYSTEAHNVWATHACVPTDPNPQEIVIGNVTEDFNMWKNNMVDQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDIRNSTVTSNTTTWGEMTNCTFNITTSIKDKMKKEAALFYKIDLVEIDEEKNNSSTRYRLINCNTSAITQACPKVSFEPIPIHFCAPAGFAILKCNNKKFSGKGPCTNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVMIRSDNITDNTKNIIVQLKEAIQIFCIRPNNNTRKSINIGPGRAFYTTGDIIGDIRQAHCNISGNWNNTLKQIATQLGKQLNQTQQIIFNSSAGGDPEIVTHSFNCGGEFFYCNSSSLFNSTWTKNGTGSWQSNDTQNGNITLQCRIKQIINLWQGVGKAMYAPPISGQINCTSNITGLVLTRDGGKVINETETFRPGGGNMKDNWRSELYKYKVVRIEPLGIAPTKAKRRVVQREKRAVGLGAMFLGFLGAAGSTMGAASIALTEQARRVLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQLLGLWGCSGKLICTTTVPWNRSWGGHNKNLDDIWGNMTWMEWEKEIDNYTSLIYTLITESHSQQEKNEQELLALDKWASLWNWFDISQWLWYIKIFIMIVGGLVGLRIVFAVLSIVNKVRQGYSPLSFQTLLPAPRGADRPEGIEEGGGERDKGRSGRLVNGFLALIWDDLRSLCLFSYHRLSDLLLIVIRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNTTAIVVAEGTDRIIEILQRIGRAFLHIPRRIRQGLERALL", - "nucleotides": "ATGAGAGTGAAGGAGATCAAGAGGAGTTATCAGCACTTGTGGAGATGGGGCATCATGCTCCTTGGAATGTTGATGATCTATAGTACTGCAGACCAGTGGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAGAGAAGCAAACACCACTCTATTTTGTGCATCAGATGCTAAAGCATATAGTACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAATAGTAATAGGAAATGTAACAGAAGATTTTAACATGTGGAAGAATAACATGGTAGATCAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGTCTAAAGCCATGTGTGAAACTAACCCCACTCTGTGTTACTTTAAATTGTACTGATATTAGGAATAGTACCGTTACTTCTAATACTACTACTTGGGGAGAAATGACAAACTGCACTTTCAATATCACCACAAGCATAAAGGATAAGATGAAAAAAGAAGCGGCACTTTTTTATAAAATTGATTTAGTAGAAATAGATGAAGAGAAAAATAACAGTAGTACCAGATATAGGTTGATAAATTGTAACACCTCAGCCATTACACAAGCCTGCCCAAAGGTATCCTTTGAGCCAATTCCCATACATTTTTGTGCCCCGGCTGGTTTTGCAATTCTAAAGTGTAATAATAAGAAGTTCAGTGGAAAGGGACCATGTACAAATGTCAGCACAGTGCAATGTACACATGGAATTAAGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAATGATTAGATCTGACAATATCACAGACAATACTAAAAACATAATAGTACAGTTGAAGGAAGCCATACAAATTTTCTGTATAAGACCCAACAACAATACAAGAAAAAGTATAAATATAGGACCAGGGAGAGCATTTTACACAACAGGAGATATAATAGGAGACATAAGGCAAGCACATTGTAACATTAGTGGAAATTGGAATAACACTTTAAAACAGATAGCTACACAATTAGGAAAACAACTGAATCAAACACAACAAATAATCTTTAATTCATCCGCAGGAGGGGACCCAGAGATTGTAACACACAGTTTTAATTGTGGAGGGGAATTCTTCTATTGTAATTCATCATCACTGTTTAATAGTACCTGGACTAAAAATGGTACTGGTAGTTGGCAGTCTAATGATACTCAGAATGGTAATATCACACTCCAATGCAGAATAAAACAAATTATAAACCTGTGGCAGGGAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAACTGTACATCAAATATTACAGGGCTGGTTTTAACAAGAGATGGGGGGAAGGTGATTAATGAAACTGAGACCTTTAGACCTGGAGGAGGAAATATGAAGGATAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAGAATTGAACCATTAGGAATAGCACCCACCAAGGCAAAAAGAAGAGTGGTGCAAAGAGAGAAAAGAGCAGTAGGACTGGGAGCTATGTTCCTTGGGTTCTTGGGAGCAGCCGGAAGCACTATGGGCGCAGCGTCAATAGCGCTGACGGAACAGGCCAGACGAGTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATTTGTTGCAACTCACAGTCTGGGGCATTAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCTCCTAGGACTTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTACTGTGCCTTGGAATCGTAGTTGGGGTGGGCATAACAAAAATCTAGATGACATTTGGGGTAACATGACCTGGATGGAGTGGGAAAAAGAAATTGACAATTACACAAGCTTAATATACACCTTAATTACAGAATCACATTCCCAACAAGAAAAGAATGAACAAGAATTATTGGCATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTGACATATCACAATGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAAAGTTAGGCAGGGATACTCACCATTATCATTTCAGACCCTCCTCCCAGCTCCGAGGGGAGCCGACAGGCCCGAAGGAATAGAAGAAGGAGGTGGAGAGAGAGACAAAGGCAGATCCGGTCGATTAGTGAACGGATTCTTAGCACTTATCTGGGACGATCTACGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGCGACTTACTCTTGATTGTGATCAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTGCAGTACTGGAGTCAGGAACTAAAGAATAGTGCTGTCAGCTTGCTCAACACCACAGCTATAGTAGTAGCTGAAGGGACAGATAGGATAATAGAAATATTACAAAGAATTGGTAGAGCTTTTCTCCACATACCTAGGAGAATAAGACAGGGCTTAGAAAGGGCTTTGCTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 7869, - "end": 7961, - "orientation": "forward", - "distance": 0.7741935483870968, - "indel_impact": 32, - "protein": "RPSSQLRGEPTGPKE", - "nucleotides": "AGACCCTCCTCCCAGCTCCGAGGGGAGCCGACAGGCCCGAAGGAATAGAAGAAGGAGGTGGAGAGAGAGACAAAGGCAGATCCGGTCGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 7870, - "end": 8145, - "orientation": "forward", - "distance": 0.2934782608695652, - "indel_impact": 0, - "protein": "DPPPSSEGSRQARRNRRRRWRERQRQIRSISERILSTYLGRSTEPVPLQLPPLERLTLDCDQDCGTSGTQGVGSPQILVESPAVLESGTKE", - "nucleotides": "GACCCTCCTCCCAGCTCCGAGGGGAGCCGACAGGCCCGAAGGAATAGAAGAAGGAGGTGGAGAGAGAGACAAAGGCAGATCCGGTCGATTAGTGAACGGATTCTTAGCACTTATCTGGGACGATCTACGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGCGACTTACTCTTGATTGTGATCAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTGCAGTACTGGAGTCAGGAACTAAAGAATAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8289, - "end": 8939, - "orientation": "forward", - "distance": 0.534862385321101, - "indel_impact": 0, - "protein": "MGGKWSKSSRVGWNAVRERMRRAQPTADRERAEPAADGVGAASRDLEKYGALTSRNTAATNADCAWLEAQEEEDEVGFPVRPQVPLRPMTYKAALDLSHFLKEKGGLEGLVYSQKRQDILDLWIYHTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPLEPDQVEEANEGENNSLLHPLSQHGMDDPEKEVLVWRFDSRLAFHHMAREKHPEFYKDC", - "nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTAGGGTTGGATGGAATGCAGTGAGGGAAAGAATGAGACGAGCTCAGCCAACAGCAGATAGGGAACGAGCTGAGCCAGCAGCAGATGGGGTGGGAGCAGCATCTAGAGACCTGGAAAAATATGGAGCACTTACAAGTAGGAATACAGCAGCTACTAATGCTGATTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGATGAGGTGGGTTTTCCAGTCAGACCTCAGGTACCTTTAAGACCCATGACTTATAAAGCAGCTCTAGATCTGAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAGTTTACTCCCAAAAAAGGCAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTATTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATCAGATATCCCCTGACCTTTGGATGGTGCTTCAAGCTAGTACCACTTGAGCCAGATCAGGTAGAAGAGGCCAATGAAGGAGAGAACAACAGCCTGTTACACCCTCTGAGCCAGCATGGGATGGATGACCCGGAGAAAGAAGTGCTAGTGTGGAGGTTTGACAGCCGCCTCGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTTCTACAAAGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115491.1": [ - { - "region": "gag", - "start": 521, - "end": 2020, - "orientation": "forward", - "distance": 0.2675944333996021, - "indel_impact": 0, - "protein": "MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", - "nucleotides": "ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGACGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGCTAATGTAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1813, - "end": 4824, - "orientation": "forward", - "distance": 0.15109343936381703, - "indel_impact": 0, - "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTACTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4769, - "end": 5347, - "orientation": "forward", - "distance": 0.36269430051813467, - "indel_impact": 0, - "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", - "nucleotides": "ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5287, - "end": 5577, - "orientation": "forward", - "distance": 0.2886597938144331, - "indel_impact": 0, - "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", - "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5558, - "end": 5772, - "orientation": "forward", - "distance": 0.25, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5697, - "end": 5772, - "orientation": "forward", - "distance": 0.46153846153846145, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5789, - "end": 6034, - "orientation": "forward", - "distance": 0.727586206896552, - "indel_impact": 0, - "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL", - "nucleotides": "ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAAAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 5952, - "end": 8492, - "orientation": "forward", - "distance": 0.5241695303550973, - "indel_impact": 0, - "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", - "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGAAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAAAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8074, - "end": 8166, - "orientation": "forward", - "distance": 0.29032258064516125, - "indel_impact": 0, - "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", - "nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8075, - "end": 8350, - "orientation": "forward", - "distance": 0.3913043478260869, - "indel_impact": 0, - "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", - "nucleotides": "GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8494, - "end": 9108, - "orientation": "forward", - "distance": 0.5333333333333332, - "indel_impact": 0, - "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", - "nucleotides": "ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTATGAGCCTGCATGGGATGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK116110.1": [ - { - "region": "gag", - "start": 489, - "end": 1600, - "orientation": "forward", - "distance": 1.74, - "indel_impact": 991, - "protein": "MSQVNSTTVMMQKGNFRNQKKTVKCFNCGKIGHIAKNCRAPRRKGCWKCGQEGHQMKDCSERQANFLGKLWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETATPPQKQEPIDRELYSLASLKSLFGNDPSSQ", - "nucleotides": "TGCAATCTATCCCATTCTGCAGCTTCCTCATTGATGGTCTCTTTTAATATTTGCATTGCTGCTTGATGTCCCCCCACTGTATTTAGCATGGTGTTTATATCTTGTGGGGTGGCTCCTTCTGCTAATGCTGAAAACATAGGTATTACTTCTGGGCTAAAAGCCTTTTCTTCTACTACTTTTACCCATGCATTTAAAGTTCTAGGTGACATGGCCTGATGTACCATTTGCCCCTGGAGGTTTTGCACTATAGGGTAGTTTTGGCTGACCTGGCTGTTATTTCCTGCGCCAGCTGCTGCTTGCTGTGCTTTCATCTTGCTTTTGTTTTGCTCTTCCTCTATCTTATCTAGCGCTCCCTTGGTGTCTTGTATCTCTATCCTTTGATGTATACAATAGAGGACCGCTACTGTATTATATAATGATTTAAGCTCTTCTGACCCTGTTTGGAGGGATGGCTGTAGCTGTCCCAGTATTTGTCTACAGCCTTCTGATGTTTCTAACAGGCCAGGATTAACTGCGAATCGTTCTAGCTCCCTGCTTGCCCATACTAGATGTTTTAACCTATATTTTTTCTTTCCTCCTGGCCTTAACCGAATTTTTTCCCATTGGTCTAATTTTCCCCCGCTTAATACTGACGCTCTCGCACCCATCTCTCTCCTTCTAGCCTCCGCTAGGTAAAAACTTTTTTGGCGTACTCACCAGTCGCCGAAGCAATGAGCCAAGTAAATTCAACTACCGTAATGATGCAGAAAGGCAATTTTAGGAACCAAAAGAAGACTGTTAAGTGTTTCAACTGTGGTAAAATAGGGCATATAGCAAAAAATTGCAGGGCCCCCAGGAGAAAGGGCTGTTGGAAATGTGGACAGGAAGGACACCAGATGAAAGATTGTAGTGAGAGACAGGCTAATTTTTTAGGGAAACTCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAGGAGAGCTTCAGGTTTGGGGAAGAGACAGCAACTCCCCCTCAGAAGCAGGAGCCGATAGACAGGGAACTATATTCCTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCTCAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1393, - "end": 4404, - "orientation": "forward", - "distance": 0.1970208540218472, - "indel_impact": 0, - "protein": "FFRETLAFPQGEAREFPSEQTRANSPTRGELQVWGRDSNSPSEAGADRQGTIFLSFPQITLWQRPLVSIKVGGQLKEALLDTGADDTVLEEMCLPGKWKPKMIGGIGGFIKVRQYDQIPIEIYGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVRLKPGMDGPKIKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPIFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSVNNETPGIRYQYNVLPQGWKGSPAIFQASMTKILEPFRKQNPDMVIYQYMDDLYIGSDLELGQHRTKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPITLPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKSLTEVVPLTREAELELAENREILKEPVHGVYYDPSKDLIAELQKQGQGQWTYQIYQEPFKNLKTGKYARIKGTHTNDVKQLTQAVQKIATESIVIWGKTPKFRLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKDPIVGAETFYVDGAANRDTKLGKAGYVTDRGRQKIVPLTDTTNQKTELQAIYLALQDSGSEVNIVSDSQYAIGILQAQPDKSESELVNQIIEQLIKKEKVYLTWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPIVAKEIVASCDKCQLKGEAIHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVNTIHTDNGSNFTSTAVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGRYSAGERIVDMIASDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSEIKVVPRRKAKIIRDYGKQMAGDDCVAGRQDED", - "nucleotides": "TTTTTTAGGGAAACTCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAGGAGAGCTTCAGGTTTGGGGAAGAGACAGCAACTCCCCCTCAGAAGCAGGAGCCGATAGACAGGGAACTATATTCCTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCTCAATAAAAGTAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGTGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAATATGATCAGATACCCATAGAAATCTATGGACATAAAGCTATAGGTACAGTGTTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATTTGTTGACTCAGATTGGGTGCACTTTAAATTTTCCCATTAGTCCTATCGAAACTGTACCAGTAAGATTAAAGCCAGGAATGGATGGCCCAAAAATTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAGATTTCAAAGATTGGGCCTGAAAATCCATACAATACTCCAATATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGGGAACTTAATAAGAGAACTCAAGATTTCTGGGAAGTTCAATTGGGAATACCGCATCCCGCAGGATTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCATTAGATAAAGACTTTAGGAAGTATACTGCATTTACCATACCCAGTGTAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAGCTAGCATGACAAAAATTTTAGAGCCTTTTAGGAAGCAAAATCCAGACATGGTTATTTATCAATACATGGATGATCTATATATAGGATCTGACTTGGAATTAGGACAGCATAGGACAAAAATAGAGGAACTGAGACAACATCTATTGAGGTGGGGGTTTACCACACCAGACAAGAAGCATCAGAAAGAACCTCCATTCCTCTGGATGGGGTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAACACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTAGGAAAATTAAATTGGGCAAGTCAGATTTATGCAGGGATTAAAGTAAGGCAGTTATGTAAACTCCTTAGAGGAACCAAATCACTAACAGAAGTAGTACCACTAACAAGAGAGGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCGGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAATTACAAAAGCAGGGACAAGGCCAGTGGACTTATCAGATTTATCAAGAACCATTTAAAAATCTGAAAACAGGAAAGTATGCAAGAATAAAGGGTACCCACACTAATGATGTAAAACAATTAACACAGGCTGTGCAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCCATACAAAAAGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATCCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAGAAAGACCCCATAGTAGGAGCAGAAACATTCTATGTCGATGGGGCAGCCAATAGGGATACTAAATTAGGAAAAGCAGGATATGTTACTGACAGGGGAAGACAAAAAATTGTCCCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTACCTAGCTCTGCAGGATTCAGGATCAGAAGTAAACATAGTATCAGACTCACAGTATGCAATAGGAATTCTTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGACATGGGTGCCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTATTATTCTTAGATGGAATAGATAAGGCCCAAGAAGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCCATAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGTTAAAAGGAGAAGCCATACATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTGGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTGGCCAGTGGATATATTGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATATTTTATCTTAAAATTAGCAGGAAGATGGCCAGTAAACACAATACATACAGACAATGGCAGCAACTTCACTAGCACTGCGGTTAAAGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGGGATCAAGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGAGGTACAGTGCAGGGGAAAGAATAGTAGACATGATAGCATCAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAACAGTGAAATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGACTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGATTAA", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4349, - "end": 4927, - "orientation": "forward", - "distance": 0.44041450777202074, - "indel_impact": 0, - "protein": "MENRWQVMIVWQVDRMRINAWKSLVKHHMHVSRKVERWVYKHHYESTNPRISSEVHIPLGDARLKITTYWGLHTGERDWHLGQGVSIEWRKKSYNTQVDPEVADQLIHLYYFDCFSESAIRKAIVGHRVSPSCEYQAGHNKVGSLQYLALAALVKSKKTKPPLPSVTKLTEDRWNKPQRTKGRRGNHIMNGH", - "nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGATTAATGCATGGAAAAGCTTAGTAAAGCACCATATGCATGTTTCAAGGAAAGTTGAGAGATGGGTTTATAAACATCACTATGAAAGTACTAATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTAAAAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGCCAGGGAGTCTCCATAGAATGGAGAAAAAAGAGCTATAATACACAAGTAGACCCTGAAGTAGCAGACCAACTAATCCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAAAGCCATAGTAGGACATAGAGTTAGTCCTAGTTGTGAATATCAAGCAGGACATAACAAGGTAGGATCCCTACAGTACTTGGCATTAGCAGCATTAGTAAAATCAAAAAAGACAAAGCCACCTTTGCCTAGCGTTACGAAGCTGACGGAGGATAGATGGAACAAGCCCCAGAGGACCAAGGGCCGCAGAGGGAACCATATAATGAATGGGCACTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 4867, - "end": 5157, - "orientation": "forward", - "distance": 0.5051546391752577, - "indel_impact": 0, - "protein": "MEQAPEDQGPQREPYNEWALELLEELKREAVRHFPRIWLQSLGQYVYETYGDTWTGVEAIIRILQQMLFIHFRIGCQHSRIGIIRRGRTRNGASRP", - "nucleotides": "ATGGAACAAGCCCCAGAGGACCAAGGGCCGCAGAGGGAACCATATAATGAATGGGCACTAGAGCTTTTAGAGGAGCTTAAGAGAGAAGCTGTTAGACATTTCCCTAGGATATGGCTTCAGAGCTTAGGACAATACGTCTATGAAACTTATGGGGATACTTGGACAGGAGTGGAAGCCATAATAAGAATTTTGCAACAAATGCTGTTCATTCATTTCAGAATTGGGTGTCAACATAGCAGAATAGGCATTATTCGACGAGGGAGAACAAGAAATGGAGCCAGTAGACCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5138, - "end": 5352, - "orientation": "forward", - "distance": 0.6835443037974687, - "indel_impact": 0, - "protein": "MEPVDPSLAPWKHPGSQPKTACTNCYCKKCCLHCQVCFTKKGLGISYGRKKRRQRRRPPQSSKAHQNPLPKX", - "nucleotides": "ATGGAGCCAGTAGACCCTAGCCTAGCGCCCTGGAAGCACCCAGGAAGTCAGCCTAAGACTGCTTGTACCAATTGCTATTGTAAAAAGTGCTGCTTACATTGCCAAGTTTGTTTCACAAAAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAAGCAGTAAGGCTCATCAAAATCCTCTACCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5277, - "end": 5352, - "orientation": "forward", - "distance": 0.5769230769230769, - "indel_impact": 0, - "protein": "MAGRSGDSDEDLLKAVRLIKILYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAAGCAGTAAGGCTCATCAAAATCCTCTACCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5369, - "end": 5614, - "orientation": "forward", - "distance": 0.7790697674418607, - "indel_impact": 0, - "protein": "MQSLQIGAIVALVVGTIIAIVVWSIVLIEYRKILRQKKIDRIIDRIVERAEDSGNESEGDQEELSALVERGHDAPWNVNDL", - "nucleotides": "ATGCAATCTTTGCAAATAGGAGCAATAGTAGCATTAGTAGTAGGAACAATAATAGCAATAGTTGTGTGGTCTATAGTACTCATAGAATATAGGAAAATATTAAGACAAAAGAAAATAGATAGAATAATAGATAGAATAGTAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGACCAGGAAGAGTTATCAGCACTGGTGGAAAGGGGGCATGATGCTCCTTGGAATGTTAATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 5532, - "end": 8072, - "orientation": "forward", - "distance": 0.576443941109853, - "indel_impact": 0, - "protein": "MRVKETRKSYQHWWKGGMMLLGMLMICSAATNLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVLLGNVTEDFNAWKNNMVEQMHEDIISLWDQSLKPCVKLTPLCVILHCTDVNNTRNGMTGELKNCSFNITTKITNKVQKEYALFYKLDVVPINNKDNDTSFNNNSYRLISCNTSVITQACPKVSFEPIPIHYCTPAGYAILRCNNETFSGKGPCTNVSSIQCTHGIRPVVSTQLLLNGSLAKQEVVIRSQNFSDNVKTIIVQLKTPVKINCTRPNNNTRKSIHAGPGKVIYATGEIIGDIRQAHCNISAAEWNDTLGQIVTKLQEQFGNKTIVFNQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWNNNGTNTWNSTGNITLPCKIRQIVNMWQKVGKAMYAPPIRGQIKCSSNITGLLLTRDGGNESESETFRPGGGDMRDNWRSELYKYKVVRIEPLGLAPTKAKRRVVQREKRAIGTLGAVFLGFLGTAGSTMGAASMTLTVQARQLLSGIVQQQNNLLKAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQLLGIWGCSGKLICTTAVPWNASWSNKSLNEIWDNMTWMEWEKEISNYTQLIYTLIEESQSQQEKNEQELLALDKWDSLWSWFSITKWLWYIKIFIMIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTRFPAPRGPDRPEGIEEDGGERDRDRSTRLVTGFLPLFWDDLRSLCLFSYHRLRDLLLIAARIVELLGHRGWEILKHWWSLLQYWSQELKKSAVSLLNATAIAVAEGTDRIIEVVQRACRAILHIPVRLRQGLERALL", - "nucleotides": "ATGAGAGTGAAGGAGACCAGGAAGAGTTATCAGCACTGGTGGAAAGGGGGCATGATGCTCCTTGGAATGTTAATGATCTGTAGTGCTGCAACAAATTTGTGGGTCACAGTCTATTATGGGGTACCTGTATGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACGCATGCCTGTGTACCCACGGACCCCAACCCACAAGAAGTATTATTGGGAAATGTGACAGAAGATTTTAATGCATGGAAAAATAACATGGTAGAGCAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCTTAAAGCCATGTGTAAAATTAACCCCACTTTGTGTTATTTTGCATTGCACTGATGTCAACAATACTAGAAATGGGATGACAGGAGAACTAAAAAACTGCTCTTTCAATATCACCACAAAAATAACAAATAAGGTACAGAAAGAATATGCACTCTTTTATAAACTTGATGTAGTACCAATAAATAATAAGGATAATGATACTAGCTTTAATAATAATAGCTATAGGTTGATAAGTTGTAACACCTCAGTTATTACACAGGCTTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTACTGTACCCCGGCTGGTTATGCAATTCTAAGGTGTAACAATGAGACATTCAGTGGAAAAGGGCCATGTACAAATGTCAGCTCAATACAATGTACACATGGAATTAGGCCAGTAGTGTCAACTCAACTGCTGTTAAATGGCAGTCTAGCAAAACAGGAGGTAGTAATTAGATCTCAAAATTTCTCGGACAATGTTAAAACCATAATAGTACAGCTGAAGACCCCTGTAAAAATTAACTGTACAAGGCCCAATAACAATACAAGAAAAAGTATACATGCAGGACCAGGGAAAGTAATTTATGCAACAGGAGAAATAATAGGAGATATAAGACAAGCACATTGCAACATTAGTGCAGCAGAGTGGAATGATACTTTAGGACAGATAGTTACAAAATTACAAGAACAATTTGGGAATAAAACAATAGTCTTCAATCAATCGTCAGGAGGGGACCCAGAAATTGTAACACACAGTTTTAATTGTGGAGGGGAATTTTTTTACTGTAATTCAACACAACTGTTTAATAGTACTTGGAATAATAATGGTACTAATACTTGGAATAGTACAGGTAATATCACACTCCCATGTAAAATAAGGCAAATTGTAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCTCCTCCCATCCGTGGACAAATTAAATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAACGAGAGTGAGAGCGAAACCTTCAGACCTGGCGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAGAATTGAACCATTAGGACTAGCACCCACTAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAATAGGAACACTGGGAGCTGTGTTCCTTGGGTTCTTGGGAACAGCAGGAAGCACTATGGGCGCAGCGTCAATGACGCTGACGGTACAGGCCAGGCAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATCTGCTGAAGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATTAAACAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGCAATAAATCTCTGAATGAAATTTGGGATAACATGACCTGGATGGAGTGGGAAAAAGAAATTAGTAATTACACACAATTAATATACACTTTAATTGAAGAATCGCAGAGCCAGCAAGAAAAGAATGAACAAGAATTATTGGCACTAGATAAGTGGGACAGCTTGTGGAGTTGGTTTAGTATAACAAAATGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTAATAGGGTTAAGAATAGTTTTTACTGTACTTTCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTGTCATTTCAGACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGACGGTGGAGAGAGAGACAGAGACAGATCCACACGCTTAGTAACCGGATTCTTACCACTTTTCTGGGACGACCTGAGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGCAGCGAGGATTGTGGAACTTCTGGGACACAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTACAATATTGGAGTCAGGAACTAAAAAAAAGTGCTGTTAGCTTGCTCAATGCCACAGCTATAGCAGTAGCTGAGGGGACAGATAGGATTATAGAAGTAGTACAAAGAGCTTGTAGAGCTATTCTCCACATACCTGTAAGACTAAGACAAGGCTTAGAAAGAGCTTTGCTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 7654, - "end": 7746, - "orientation": "forward", - "distance": 0.4838709677419355, - "indel_impact": 0, - "protein": "RPASQPRGDPTGPKESKKTVERETETDPHA", - "nucleotides": "AGACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGACGGTGGAGAGAGAGACAGAGACAGATCCACACGCTTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 7655, - "end": 7930, - "orientation": "forward", - "distance": 0.48913043478260865, - "indel_impact": 0, - "protein": "DPLPSPEGTRQARRNRRRRWRERQRQIHTLSNRILTTFLGRPEEPVPLQLPPLERLTLDCSEDCGTSGTQGVGNPQTLVESPTILESGTKKKCC", - "nucleotides": "GACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGACGGTGGAGAGAGAGACAGAGACAGATCCACACGCTTAGTAACCGGATTCTTACCACTTTTCTGGGACGACCTGAGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGCAGCGAGGATTGTGGAACTTCTGGGACACAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTACAATATTGGAGTCAGGAACTAAAAAAAAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8074, - "end": 8694, - "orientation": "forward", - "distance": 0.5900473933649286, - "indel_impact": 0, - "protein": "MGGKWSKSSVVGWPAVRERIRRAGPAAEGVGAVSRDLDKHGAITSNNTPATNAACAWLEAQEEEEVGFPVRPQVPLRPMTYKGALDLSHFLKEKGGLEGMIYSQQRQDILDLWVYHTQGYFPDWQNYTPGPGVRFPLTFGWCFKLVPLETEQVEAATGGENNCLLHPLNQHGMDDPEREVLMWKFDSSLAFHHRAKELHPEYYKDC", - "nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGGTTGGATGGCCTGCTGTAAGGGAAAGAATAAGAAGAGCTGGGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGACAAACATGGAGCAATCACAAGTAACAATACACCAGCTACTAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTTAGGCCTCAAGTACCTTTAAGACCAATGACTTACAAGGGAGCTTTGGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGATGATATACTCCCAGCAAAGACAAGATATCCTTGATCTGTGGGTCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGACCAGGGGTCAGGTTTCCACTGACCTTTGGATGGTGCTTCAAACTAGTACCACTTGAGACAGAGCAGGTAGAAGCGGCCACTGGAGGAGAGAACAACTGCTTGTTACACCCTTTGAACCAGCATGGGATGGATGACCCGGAGAGAGAAGTACTAATGTGGAAGTTTGACAGCAGCCTAGCATTTCATCACAGAGCCAAAGAGCTGCATCCGGAGTACTACAAAGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115527.1": [ - { - "region": "gag", - "start": 683, - "end": 2182, - "orientation": "forward", - "distance": 0.2675944333996021, - "indel_impact": 0, - "protein": "MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", - "nucleotides": "ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGACGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGCTAATGTAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1975, - "end": 4986, - "orientation": "forward", - "distance": 0.15109343936381703, - "indel_impact": 0, - "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTACTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4931, - "end": 5509, - "orientation": "forward", - "distance": 0.36269430051813467, - "indel_impact": 0, - "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", - "nucleotides": "ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5449, - "end": 5739, - "orientation": "forward", - "distance": 0.2886597938144331, - "indel_impact": 0, - "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", - "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5720, - "end": 5934, - "orientation": "forward", - "distance": 0.25, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5859, - "end": 5934, - "orientation": "forward", - "distance": 0.46153846153846145, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5951, - "end": 6196, - "orientation": "forward", - "distance": 0.727586206896552, - "indel_impact": 0, - "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL", - "nucleotides": "ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAAAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 6114, - "end": 8654, - "orientation": "forward", - "distance": 0.5241695303550973, - "indel_impact": 0, - "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", - "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGAAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAAAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8236, - "end": 8328, - "orientation": "forward", - "distance": 0.29032258064516125, - "indel_impact": 0, - "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", - "nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8237, - "end": 8512, - "orientation": "forward", - "distance": 0.3913043478260869, - "indel_impact": 0, - "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", - "nucleotides": "GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8656, - "end": 9270, - "orientation": "forward", - "distance": 0.5333333333333332, - "indel_impact": 0, - "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", - "nucleotides": "ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTATGAGCCTGCATGGGATGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK114997.1": [ - { - "region": "gag", - "start": 210, - "end": 1718, - "orientation": "forward", - "distance": 0.24035785288270395, - "indel_impact": 0, - "protein": "MGARASVLSGGELDRWEKIYLRPGGKKKYRLKHIVWASRELERFAVNPGLLESSEGCRQILGQLQPSLQTGSEELRSLYNTVAVLYCVHQRINVKDTKEALDTIEEEQNKSKKKAQQAAADTGNSSNSQVSQNYPIVQNLQGQMVHQPISPSTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINDEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHTPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSASIMAQGGNFRNQKRNVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPAESFRFGEEATAPPQKQETKDKELYPLASLRSLFGNDP", - "nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGCGGAGAATTAGATAGATGGGAAAAAATTTACCTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTCAACCCTGGCCTGTTAGAGTCATCAGAAGGCTGCAGGCAAATTCTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAGTCCTCTATTGCGTACATCAAAGGATAAATGTAAAAGACACCAAGGAAGCTCTAGACACAATAGAGGAAGAACAAAACAAAAGTAAGAAAAAGGCACAACAAGCAGCAGCTGACACAGGAAACAGCAGCAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACCTACAGGGGCAAATGGTACATCAACCCATATCACCTAGCACTTTAAATGCATGGGTAAAAGTGATAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCACTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAGATGTTAAAAGAAACCATCAATGATGAGGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGCCCTATTGCACCAGGCCAGATGAGGGAACCAAGGGGAAGTGACATAGCAGGAACCACTAGTACCCTTCAGGAACAAATAGGATGGATGACACATACTCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTGGGGTTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTTTGGACATAAGACAAGGACCAAAGGAACCCTTTAGGGATTATGTAGATCGGTTCTATAAAACTCTAAGGGCTGAGCAAGCGTCACAGGATGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACCATTTTAAAAGCATTAGGACCAGCAGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGGGGACCCGGCCACAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCCTCCATAATGGCGCAAGGAGGCAATTTTAGGAACCAAAAAAGGAATGTTAAGTGTTTCAATTGTGGCAAAGAAGGACACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATTTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTGGGGAAGAGGCAACAGCTCCCCCTCAGAAGCAGGAGACGAAGGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1511, - "end": 4522, - "orientation": "forward", - "distance": 0.22266401590457252, - "indel_impact": 0, - "protein": "FFREDLAFPQGKAREFSPEQTRANSPASRELQVWGRGNSSPSEAGDEGQGTVSFSFPQITLWQRPIISIRIGGQLKEALLDTGADDTVLEEMNLPGKWKPKMIGGIGGFIKVRQYDQVPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEDKIKALIEICAEMEKEGKITKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKRKKSVTVLDVGDAYFSVPLDQDFRKYTAFTIPSINNEAPGVRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKENPDIVIYQYMDDLYVGSDLEIEQHRTKIEELRQHLLGWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPDKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKEPVHGAYYDPSKDLIAEVQKQGGDQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIALEAIVIWGKTPKFKLPIQKETWEMWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRETKLGKAGYVTDRGRQKVVSLMDTTNQRTELHAIHLALQDSGSTVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKIYLAWVPAHKGIGGNEQVDKLVSNGIRKILFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEPMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIAAETGQETAYFILKLAGRWPVKIIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNNELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQISKIQNFRVYYRDSREPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKILRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAGATTTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTGGGGAAGAGGCAACAGCTCCCCCTCAGAAGCAGGAGACGAAGGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAAGAATAGGAGGGCAACTAAAAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGGTACCCATAGAAATTTGTGGACATAAAGCTATAGGTACAGTATTAATAGGACCTACACCAGTCAACATAATTGGAAGAAATCTGTTGACCCAGCTTGGTTGCACTTTAAATTTTCCCATCAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAGAGTTAAACAATGGCCATTGACAGAAGACAAAATAAAGGCATTAATAGAAATTTGTGCAGAAATGGAAAAGGAAGGGAAAATTACAAAAATTGGGCCTGAGAATCCATATAATACTCCAGTGTTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCCGAGAACTTAATAAGAGAACACAAGACTTCTGGGAAGTTCAACTAGGAATACCACATCCCGCAGGGTTAAAAAGGAAAAAATCAGTAACAGTACTAGATGTGGGTGATGCATATTTTTCAGTTCCTTTAGACCAAGACTTCAGAAAGTATACTGCATTTACCATACCCAGTATAAACAATGAGGCACCAGGAGTTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGGAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGAGCAGCACAGAACAAAAATAGAGGAACTGAGACAACATCTGTTGGGGTGGGGACTCACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATACAGCTGCCAGACAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAGTTGAATTGGGCAAGCCAGATCTATCCAGGGATTAAAGTAAAGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGAACCAGTCCATGGAGCATATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGGGGAGACCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGGAAATATGCAAGAACGAGGGGTGCCCACACTAATGATGTAAAACAGTTAACAGAGGCAGTGCAAAAAATAGCCCTAGAAGCCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCTATACAAAAGGAAACATGGGAAATGTGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAATTTGTCAATACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAGAAAGAACCCATAATAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAGACTAAATTAGGAAAAGCAGGGTATGTTACGGACAGAGGAAGACAAAAGGTTGTCTCCCTAATGGACACAACAAATCAGAGGACTGAGTTACACGCAATTCATCTAGCTTTGCAGGATTCAGGATCAACAGTAAACATAGTAACAGACTCACAATATGCCTTAGGGATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGCTAATAAAAAAGGAAAAGATTTACCTGGCATGGGTCCCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTAATGGAATCAGAAAAATACTATTTCTGGATGGAATAGATAAGGCCCAAGAAGAACATGAAAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAGTGTCAGTTAAAAGGAGAACCTATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTGGATTGTACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATACATAGAAGCAGAAGTTATTGCCGCAGAAACAGGGCAGGAAACAGCATACTTTATCTTAAAATTGGCAGGAAGATGGCCAGTAAAAATAATACATACAGACAATGGCAGCAATTTCACCAGCACTACGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGTATTCCCTACAATCCTCAAAGTCAAGGAGTAGTAGAATCTATGAATAATGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATTCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAACTACAAAAACAAATTTCAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGAACCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCCTTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4467, - "end": 5045, - "orientation": "forward", - "distance": 0.3969072164948453, - "indel_impact": 0, - "protein": "MENRWQVMIVWQVDRMRIRTWNSLVKHHMHISKKAQRWVYRHHYESHNPKTSSEVHIPLGEARLVIKTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPSLADQLIHLYYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALTALITPKRRKPPLPSVTKLTEDRWNKSQRTKGHKGSHTMNGH", - "nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATATTTCAAAGAAAGCTCAGAGATGGGTTTACAGACATCACTATGAAAGCCATAATCCAAAAACAAGTTCAGAAGTACACATCCCATTAGGGGAAGCAAGATTGGTAATAAAAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGCCAGGGAGTATCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTAGCCTGGCAGACCAACTAATTCACCTGTATTATTTTGATTGTTTTTCAGAATCTGCTATAAGGAATGCCATATTAGGACATAGAGTTAGCCCTAGGTGTGAATATCAGGCAGGACATAACAAGGTAGGATCCCTACAATACTTGGCATTAACAGCATTAATAACACCAAAAAGGAGAAAGCCACCCTTGCCCAGTGTTACAAAATTAACAGAGGACAGATGGAACAAGTCCCAGAGGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGACATTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 4985, - "end": 5275, - "orientation": "forward", - "distance": 0.28125, - "indel_impact": 0, - "protein": "MEQVPEDQGPQREPYNEWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDTWAGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRARNGASRS", - "nucleotides": "ATGGAACAAGTCCCAGAGGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGACATTAGAGCTCTTAGAAGAGCTTAAGAGTGAAGCTGTTAGGCATTTTCCTAGACCATGGCTTCATAGCTTAGGACAATATATCTATGAAACTTATGGAGATACTTGGGCAGGGGTGGAAGCCATAATAAGAATCCTGCAACAACTGTTGTTCATTCATTTCAGAATTGGGTGTCAACATAGCAGGATAGGAATTATTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5256, - "end": 5470, - "orientation": "forward", - "distance": 0.547945205479452, - "indel_impact": 0, - "protein": "MEPVDPSLEPWKHPGSQPMTACNNCYCKRCCFHCQVCFTTKGLGISYGRKKRRQRRRASHSSQNHQAALPEX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGCTTAGAGCCCTGGAAACATCCAGGAAGTCAACCTATGACTGCTTGTAACAATTGCTATTGTAAACGGTGTTGCTTTCATTGCCAAGTTTGCTTCACCACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5395, - "end": 5470, - "orientation": "forward", - "distance": 0.5925925925925926, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLTAVRIIKRLYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5487, - "end": 5732, - "orientation": "forward", - "distance": 0.7790697674418607, - "indel_impact": 0, - "protein": "MHALKIAAIVGLVVATIIAIVVWSIVLIEYRKILRQRKIDRLIDRIRERAEDSGNESDGDQEELSALVEMGHLVPWDGDDM", - "nucleotides": "ATGCATGCCTTAAAAATAGCAGCAATAGTAGGATTAGTAGTAGCAACAATAATAGCAATAGTTGTGTGGTCTATAGTATTAATAGAATATAGGAAAATTTTAAGACAAAGGAAAATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAAAGTGACGGAGACCAGGAGGAATTATCAGCACTTGTGGAGATGGGGCATCTTGTTCCTTGGGATGGTGATGATATGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 5650, - "end": 8206, - "orientation": "forward", - "distance": 1.2346375143843504, - "indel_impact": 1167, - "protein": "MHSFNCGGEFFYCNTTQLFNSTWNGTDNWNGTESNNTITLPCRIKQIINLWQEVGRAMYAPPIQGQIRCSSNITGLLLVRDGGSNNSSNDTETFRPGGGDMRDNWRSELYKYKVVKIEPLGIAPTHAKRRVVQREKRAVGLGAFFLGFLGAAGSTMGAASVTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLALERYLKDQQLLGIWGCSGKLICTTNVPWNISWSPRWNRSLDEIWTNMTWRQWEKEIDNYTDTIYNLIELSQNQQEQNEQDLLALDKWASLWSWFDITQWLWYIKIFIMIVGGLIGLRIVFTILSIVNRVRQGYSPLSLQTLLPTQRGPDRPEGTEEGGGERDRGTSTRLVHGFLALIWDDLRSLFLFSYHRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWSQEIKNSAVSLLNTTAIAVAEGTDRIIEVLQRGFRAILHIPTRIRQGLERALL", - "nucleotides": "ATGAAAGTGACGGAGACCAGGAGGAATTATCAGCACTTGTGGAGATGGGGCATCTTGTTCCTTGGGATGGTGATGATATGTAGTGCCAACAACTTGTGGGTCACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAACTACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGAGACAGAGAAGCATAATGTTTGGGCAACCCATGCCTGTGTACCCACAGACCCCAGCCCACAGGAAGTAGCATTGAAAAATGTGACAGAAACATTTAACATGTGGAAAAATGACATGGTAGAGCAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTGAAACTAACCCCACTCTGTGTTACTTTAAATTGCACTGATAGTTTAAGTAATAATACTAATTATAATATAACGGAAAAAGGAGAAATAAAAAACTGCTCTTTCAATGTCACCACAGGAATAAGAGATAGGGTGACAAAAGAACATGCACTTTTCTATAGACTTGATGTAGTATCAATAGATGAAGATAGTGGAAATACTACGGGCAAATATAGGATGATAAATTGTAACACCTCAGTCATTACACAAGCCTGTCCAAAGATATCCTTTGAGCCAATTCCCATACATTTTTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAGAGATAAGAAATTCAATGGAACAGGAGAATGTAGAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGCTGAACGGCAGTCTAGCAGAAGAAGAGATAGTAATTAGATCTGCCAATCTCTCGAACAATGCTAAAACCATAATAGTACAGCTGAATAAACCTGTAAAAATTAATTGTACAAGACCCAACAATAATACAAGAAGAAGTATACACATAGGACCAGGGAGAGCATTTTATGGAACAGACATAATAGGGGATATAAGACAAGCGCATTGTAACATTAGTGGGAAAGATTGGAATGACACTTTAAAAAAAGATAGTTATAAAATTAAAAGAAAAATTTGAGAATAAAACAATAGTCTTTAATCAATCCTCAGGAGGGGACCCAGAGATAGTGATGCATAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACAACACAGCTGTTTAATAGTACTTGGAATGGTACTGATAATTGGAATGGTACTGAATCAAATAACACTATCACACTCCCATGCAGAATAAAACAAATTATAAACTTGTGGCAGGAAGTAGGAAGAGCAATGTATGCTCCTCCCATCCAAGGGCAAATTAGATGTTCATCAAATATTACAGGGCTGCTGCTAGTAAGAGATGGTGGGAGCAACAATAGTAGTAATGATACAGAGACATTCAGGCCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTCAAAATTGAACCATTAGGAATAGCACCCACCCATGCAAAGAGAAGAGTGGTGCAAAGGGAAAAAAGAGCAGTGGGACTTGGAGCTTTCTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTCTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTAATGTGCCCTGGAATATTAGTTGGAGCCCTAGATGGAATAGATCTCTAGATGAGATTTGGACTAACATGACCTGGAGGCAGTGGGAAAAGGAAATTGACAATTACACAGACACAATATATAACTTAATTGAACTATCACAGAACCAACAAGAACAGAATGAACAAGACTTATTGGCATTAGATAAGTGGGCAAGTTTGTGGAGTTGGTTTGACATTACACAGTGGCTATGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGATAGGTTTAAGAATAGTTTTTACTATACTGTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTGTCATTGCAGACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAGGAACAGAAGAAGGAGGTGGAGAGAGAGACAGAGGCACATCCACGAGATTAGTGCATGGATTCTTAGCACTCATCTGGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAGAGACTTACTCTTGATTGCGGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCACTGAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAAATAAAGAATAGTGCAGTTAGCTTGCTCAACACCACAGCAATAGCAGTAGCTGAGGGGACAGATAGGATTATAGAAGTACTACAAAGAGGCTTTAGAGCTATTCTCCACATACCTACACGAATAAGACAGGGCTTAGAAAGGGCTTTGCTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 7788, - "end": 7880, - "orientation": "forward", - "distance": 0.5806451612903225, - "indel_impact": 0, - "protein": "RPSSQPRGDPTGPKEQKKEVERETEAHPRD", - "nucleotides": "AGACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAGGAACAGAAGAAGGAGGTGGAGAGAGAGACAGAGGCACATCCACGAGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 7789, - "end": 8064, - "orientation": "forward", - "distance": 0.423913043478261, - "indel_impact": 0, - "protein": "DPPPNPEGTRQARRNRRRRWRERQRHIHEISAWILSTHLGRPAEPVPLQLPPLERLTLDCGEDCGTSGTQGVGSTEVLVESPAVLESGNKE", - "nucleotides": "GACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAGGAACAGAAGAAGGAGGTGGAGAGAGAGACAGAGGCACATCCACGAGATTAGTGCATGGATTCTTAGCACTCATCTGGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAGAGACTTACTCTTGATTGCGGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCACTGAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAAATAAAGAATAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8208, - "end": 8849, - "orientation": "forward", - "distance": 0.7375565610859729, - "indel_impact": 0, - "protein": "MGNKLSRGLRAGWPAIRERMRRARPVREPEPAAAGVGAASRDLERHGALTSSNTAATNADVACLEAQQEEEEVGFPVRPQVPLRPMTFKGALDISHFLKEKGGLDGLIYSKRRQDILDLWLYNTQGYFPDWQNYTPGPGVRFPLCFGWCFKLVPVDPDKVEEASVGENNCLLSPENLHGMEDEHREVLQWRFDSRLAFHHMARELHPEYFKDC", - "nucleotides": "ATGGGTAACAAGTTGTCAAGAGGGCTCAGGGCTGGATGGCCTGCCATAAGGGAGAGAATGAGACGAGCTAGGCCAGTAAGAGAGCCAGAGCCAGCAGCAGCTGGGGTGGGAGCAGCATCTCGAGACCTGGAAAGACATGGAGCACTTACAAGTAGCAATACAGCAGCTACCAATGCTGATGTTGCCTGCCTGGAAGCACAACAGGAGGAGGAAGAGGTGGGTTTTCCAGTCAGACCTCAGGTACCTTTAAGGCCAATGACTTTCAAGGGAGCTCTAGATATTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGATGGGCTAATTTACTCCAAAAGAAGACAAGACATCCTTGATCTGTGGCTCTACAACACACAAGGCTACTTCCCTGACTGGCAGAACTACACACCAGGGCCAGGAGTCAGATTTCCACTGTGTTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGATAAGGTAGAAGAGGCCAGTGTAGGAGAGAACAACTGCTTGTTAAGCCCCGAAAACCTTCATGGAATGGAGGACGAACACAGAGAAGTATTGCAGTGGAGGTTTGACAGTCGCCTAGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAATATTTCAAGGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115518.1": [ - { - "region": "gag", - "start": 739, - "end": 2238, - "orientation": "forward", - "distance": 0.2675944333996021, - "indel_impact": 0, - "protein": "MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", - "nucleotides": "ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGACGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGCTAATGTAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 2031, - "end": 5042, - "orientation": "forward", - "distance": 0.15109343936381703, - "indel_impact": 0, - "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTACTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4987, - "end": 5565, - "orientation": "forward", - "distance": 0.36269430051813467, - "indel_impact": 0, - "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", - "nucleotides": "ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5505, - "end": 5795, - "orientation": "forward", - "distance": 0.2886597938144331, - "indel_impact": 0, - "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", - "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5776, - "end": 5990, - "orientation": "forward", - "distance": 0.25, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5915, - "end": 5990, - "orientation": "forward", - "distance": 0.46153846153846145, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 6007, - "end": 6252, - "orientation": "forward", - "distance": 0.727586206896552, - "indel_impact": 0, - "protein": "MQPLHIAAIVGLIVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL", - "nucleotides": "ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAATAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 6170, - "end": 8710, - "orientation": "forward", - "distance": 0.5247139588100684, - "indel_impact": 0, - "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTPEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", - "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCCCAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8292, - "end": 8384, - "orientation": "forward", - "distance": 0.29032258064516125, - "indel_impact": 0, - "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", - "nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8293, - "end": 8568, - "orientation": "forward", - "distance": 0.3913043478260869, - "indel_impact": 0, - "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", - "nucleotides": "GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8712, - "end": 9326, - "orientation": "forward", - "distance": 0.5333333333333332, - "indel_impact": 0, - "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", - "nucleotides": "ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTACGAGCCTGCATGGGATGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115065.1": [ - { - "region": "gag", - "start": 221, - "end": 1729, - "orientation": "forward", - "distance": 0.2507968127490041, - "indel_impact": 0, - "protein": "MGARASVLSGGELDRWEKIYLRPGGKKKYRLKHIVWASRELERFAVNPGLLESSEGCRQILGQLQPALQTGSEELRSLYNTVAVLYCVHPRINVKDTKEALDKIEEEQNKSKKKAQQAAADTGNSSNSQVSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINAEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHNPPIPVGDIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTKSASIMVQGGNFRNQRRNVKCFNCGKEGHTAKNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPAESFRFGEEATAPPQKQETKDQELYPLASLRSLFGNDP", - "nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGCGGAGAATTAGATAGATGGGAAAAAATTTACCTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTCAACCCTGGCCTGTTAGAGTCATCAGAAGGCTGCAGGCAAATTCTGGGACAGCTACAACCAGCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAGTCCTCTATTGCGTACATCCAAGGATAAATGTAAAAGACACCAAAGAAGCTCTAGACAAAATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAACAAGCAGCAGCTGACACAGGAAACAGCAGCAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACCTACAGGGGCAAATGGTACATCAACCCTTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAATAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCACTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAGATGTTAAAAGAAACCATCAATGCTGAGGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGCCCTATTGCACCAGGCCAGATGAGGGAACCAAGGGGAAGTGACATAGCAGGAACCACTAGTACCCTTCAGGAACAAATAGGATGGATGACACATAATCCACCTATCCCAGTAGGAGATATCTATAAAAGATGGATAATCCTGGGGTTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTTTGGACATAAGACAAGGACCAAAGGAACCCTTTAGGGATTATGTAGACCGGTTCTATAAAACTCTAAGGGCTGAGCAAGCGTCACAGGATGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAATCCAGATTGTAAGACCATTTTAAAAGCATTAGGACCAGCAGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGGGGACCCGGCCACAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAAATCAGCCTCCATAATGGTGCAGGGAGGCAATTTTAGGAACCAAAGAAGGAATGTTAAGTGTTTCAATTGTGGCAAAGAAGGACACACAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATTTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTGGGGAAGAGGCAACAGCTCCCCCTCAGAAGCAGGAGACGAAGGACCAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1522, - "end": 4533, - "orientation": "forward", - "distance": 0.20775347912524844, - "indel_impact": 0, - "protein": "FFREDLAFPQGKAREFSPEQTRANSPASRELQVWGRGNSSPSEAGDEGPGTVSFSFPQITLWQRPIISIKIGGQLKEALLDTGADDTVLEEMNLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALIEICAEMEKEGKITKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNEAPGVRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKENPDIVIYQYMDDLYVGSDLEIEQHRTKIEELRQHLLGWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPDKDSWTINDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKEPVHGAYYDPSKDLIAEVQKQGGDQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIALEAIVIWGKTPKFKLPIQKETWEMWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVVSLMDTTNQRTELHAIHLALQDSGSTVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKIYLAWVPAHKGIGGNEQVDKLVSSGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEPMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIAAETGQETAYFILKLAGRWPVKIIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQISKIQNFRVYYRDSREPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKILRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAGATTTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTGGGGAAGAGGCAACAGCTCCCCCTCAGAAGCAGGAGACGAAGGACCAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAAAAATAGGAGGGCAACTAAAAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATTTGTGGACATAAAGCTATAGGTACAGTATTAATAGGACCTACACCAGTCAACATAATTGGAAGAAATCTGTTGACCCAGCTTGGTTGCACTTTAAATTTTCCCATCAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAGAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAGGCATTAATAGAAATTTGTGCAGAAATGGAAAAGGAAGGGAAAATTACAAAAATTGGGCCTGAGAATCCATATAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCCGAGAACTTAATAAGAGAACACAAGACTTCTGGGAAGTTCAACTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTAGATGTGGGTGATGCATATTTTTCAGTTCCTTTAGACAAAGACTTCAGAAAGTATACTGCATTTACCATACCCAGTATAAACAATGAGGCACCAGGAGTTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGGAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGATTTAGAAATAGAGCAGCACAGAACAAAAATAGAGGAACTGAGACAACATCTGTTGGGGTGGGGACTCACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATACAGCTGCCAGACAAAGACAGCTGGACTATCAATGACATACAGAAGTTAGTGGGAAAGTTGAATTGGGCAAGCCAAATCTATCCAGGGATTAAAGTAAAGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGAACCAGTACATGGAGCATATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGGGGAGACCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGGAAATATGCAAGAACGAGGGGTGCCCACACTAATGATGTAAAACAGTTAACAGAGGCAGTGCAAAAAATAGCCCTAGAAGCCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCTATACAAAAGGAAACATGGGAAATGTGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAATTTGTCAATACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAGACTAAATTAGGAAAAGCAGGGTATGTTACGGACAGAGGAAGACAAAAGGTTGTCTCCCTAATGGACACAACAAATCAGAGGACTGAGTTACACGCTATTCATCTAGCTTTGCAGGATTCAGGATCAACAGTAAACATAGTGACAGACTCACAATATGCCTTAGGGATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGCTAATAAAAAAGGAAAAGATTTACCTGGCATGGGTCCCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTAGTGGAATCAGAAAAGTACTATTTCTGGATGGAATAGATAAGGCCCAAGAAGAACATGAAAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAGTGTCAGTTAAAAGGAGAACCTATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTGGATTGTACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATACATAGAAGCAGAAGTTATTGCAGCAGAAACAGGGCAGGAAACAGCATACTTTATCTTAAAATTGGCAGGAAGATGGCCAGTAAAAATAATACATACAGACAATGGCAGCAATTTCACCAGCACTACGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGTATTCCCTACAATCCTCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATTCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAACTACAAAAACAAATTTCAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGAACCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCCTTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4478, - "end": 5056, - "orientation": "forward", - "distance": 0.3969072164948453, - "indel_impact": 0, - "protein": "MENRWQVMIVWQVDRMRIRTWNSLVKHHMHISKKAQRWVYRHHYESHNPKTSSEVHIPLGEARLVIKTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPGLADQLIHLYYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALTALITPKRRKPPLPSVTKLTEDRWNKSQRTKGHKGSHTMNGH", - "nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATATTTCAAAGAAAGCTCAGAGATGGGTTTACAGACATCACTATGAAAGCCATAATCCAAAAACAAGTTCAGAAGTACACATCCCATTAGGGGAAGCAAGATTGGTAATAAAAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGCCAGGGAGTATCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTGGCCTGGCAGACCAACTAATTCACCTGTATTATTTTGATTGTTTTTCAGAATCTGCTATAAGGAATGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAGGCAGGACATAACAAGGTAGGATCCCTACAATACTTGGCATTAACAGCATTAATAACACCAAAAAGGAGAAAGCCACCTTTGCCCAGTGTTACAAAATTAACAGAGGACAGATGGAACAAGTCCCAGAGGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGACATTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 4996, - "end": 5286, - "orientation": "forward", - "distance": 0.3125, - "indel_impact": 0, - "protein": "MEQVPEDQGPQREPYNEWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDTWAGVEAIIRTLQQLLFIHFRIGCQHSRIGIIRQRRARNGASRS", - "nucleotides": "ATGGAACAAGTCCCAGAGGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGACATTAGAGCTCTTAGAAGAACTTAAGAGTGAAGCTGTTAGGCATTTTCCTAGACCATGGCTTCATAGCTTAGGACAATATATCTATGAAACTTATGGAGATACTTGGGCAGGGGTGGAAGCCATAATAAGAACCCTGCAACAACTGTTGTTCATTCATTTCAGAATTGGGTGTCAACATAGCAGGATAGGAATTATTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5267, - "end": 5481, - "orientation": "forward", - "distance": 0.5945945945945945, - "indel_impact": 0, - "protein": "MEPVDPSLEPWKHPGSQPMTACNNCYCKRCCFHCQVCFTRKGLGISYGRKKRRQRRRASHSSQNHQAALPEX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGCTTAGAGCCCTGGAAACATCCAGGAAGTCAACCTATGACTGCTTGTAACAATTGCTATTGTAAACGGTGTTGCTTTCATTGCCAAGTTTGCTTCACAAGAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5406, - "end": 5481, - "orientation": "forward", - "distance": 0.5925925925925926, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLTAVRIIKRLYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5498, - "end": 5743, - "orientation": "forward", - "distance": 0.7790697674418607, - "indel_impact": 0, - "protein": "MHALEIAAIVGLVVAAIIAIVVWSIVLIEYRKILRQRKIDRLIDRIRERAEDSGNESDGDQEELSALVEMGHLVPWDGDDM", - "nucleotides": "ATGCATGCCTTAGAAATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGTCTATAGTATTAATAGAATATAGGAAAATTTTAAGACAAAGGAAAATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAAAGTGACGGGGACCAGGAGGAGTTATCAGCACTTGTGGAGATGGGGCATCTTGTTCCTTGGGATGGTGATGATATGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 5661, - "end": 8261, - "orientation": "forward", - "distance": 0.6331111111111118, - "indel_impact": 0, - "protein": "MKVTGTRRSYQHLWRWGILFLGMVMICSANNLWVTVYYGVPVWKEATTTLFCASDAKAYETEKHNVWATHACVPTDPSPQEVALENVTETFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDNLNLNRPNNNTCSNNTNYNITEKGEIKNCSFNVTTGIRDRVTKEHALFYKLDVVPIDEGSGNTTGKYRMINCNTSVITQACPKVSFEPIPIHFCAPAGFAILKCKDKKFNGTGECRNVSTVQCTHGIRPVVSTQLLLNGSLAEEEIVIRSANLSNNAKTIIVQLNKSVKINCTRPNNNTRRSIHIGPGRAFYGTDIIGDIRQAHCNISGKDWNDTLKQIVIKLKEKFENKTIVFTQSSGGDPEIVMHSFNCGGEFFYCNTTQLFNSTWNNSTWNGTDNWNGTESNSTITLPCRIKQIINLWQEVGRAMYAPPIQGQIRCSSNITGLLLVRDGGSNNSSNDTETFRPGGGDMRDNWRSELYKYKVVKIEPLGIAPTHAKRRVVQREKRAIGLGAFFLGFLGAAGSTMGAASVTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLALERYLKDQQLLGIWGCSGKLICTTNVPWNISWSPRWNRSLDEIWTNMTWKQWEKEIDNYTDIIYNLIEQSQNQQEQNEQDLLALDKWASLWNWFDITQWLWYIKIFIMIVGGLIGLRIVFTILSIVNRVRQGYSPLSLQTLLPTQRGPDRPEGTEEGGGERDRGTSTRLVHGFLALIWDDLRSLFLFSYHRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWSQEIKNSAVSLLNTTAIAVAEGTDRIIEVLQRGFRAILHIPTRIRQGLERALL", - "nucleotides": "ATGAAAGTGACGGGGACCAGGAGGAGTTATCAGCACTTGTGGAGATGGGGCATCTTGTTCCTTGGGATGGTGATGATATGTAGTGCCAACAACTTGTGGGTCACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAACTACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGAGACAGAGAAGCATAATGTTTGGGCAACCCATGCCTGTGTACCCACAGACCCCAGCCCACAGGAAGTAGCATTGGAAAATGTGACAGAAACATTTAACATGTGGAAAAATGACATGGTAGAGCAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTGAAACTAACCCCACTCTGTGTTACTTTAAATTGCACTGATAATTTAAATCTAAATCGCCCTAACAATAATACTTGTAGTAATAATACTAATTATAATATAACGGAAAAAGGAGAAATAAAAAACTGCTCTTTCAATGTCACCACAGGAATAAGAGATAGGGTGACAAAAGAACATGCACTTTTCTATAAACTTGATGTAGTACCAATAGATGAAGGTAGTGGAAATACTACGGGCAAATATAGGATGATAAATTGTAACACCTCAGTCATTACACAAGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTTTTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAAGATAAGAAATTCAATGGAACAGGAGAATGTAGAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGCTGAACGGCAGTCTAGCAGAAGAAGAGATAGTAATTAGATCTGCCAATCTCTCGAACAATGCTAAAACCATAATAGTACAGCTGAATAAATCTGTAAAAATTAATTGTACAAGACCCAACAATAATACAAGAAGAAGTATACACATAGGACCAGGGAGGGCATTTTATGGAACAGACATAATAGGAGATATAAGACAAGCGCATTGTAACATTAGTGGGAAAGATTGGAATGACACTTTAAAACAGATAGTTATAAAATTAAAAGAAAAATTTGAGAATAAAACAATAGTCTTTACTCAATCCTCAGGAGGGGACCCAGAGATAGTGATGCATAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAATAGTACTTGGAATGGTACTGACAATTGGAATGGTACTGAATCAAATAGCACTATCACACTCCCATGCAGAATAAAACAAATTATAAACTTGTGGCAGGAAGTAGGAAGAGCAATGTATGCCCCTCCCATCCAAGGGCAAATTAGATGTTCATCAAATATTACAGGGCTGCTGCTAGTAAGAGATGGTGGGAGCAACAATAGTAGTAATGATACAGAGACCTTCAGGCCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTCAAAATTGAACCATTAGGAATAGCACCCACCCATGCAAAGAGAAGAGTGGTGCAAAGGGAAAAAAGAGCAATAGGACTTGGAGCTTTCTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTTTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTAATGTGCCCTGGAATATTAGTTGGAGCCCTAGATGGAATAGATCTCTAGATGAGATTTGGACTAACATGACCTGGAAGCAGTGGGAAAAGGAAATTGACAATTACACAGACATAATATATAACCTAATTGAACAATCACAGAACCAACAAGAACAGAATGAACAAGACTTATTGGCATTAGATAAGTGGGCAAGTTTGTGGAATTGGTTTGACATTACACAGTGGCTATGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGATAGGTTTAAGAATAGTTTTTACTATACTGTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTGTCATTGCAGACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAGGAACAGAAGAAGGAGGTGGAGAGAGAGACAGAGGCACATCCACGAGATTAGTGCATGGATTCTTAGCACTCATCTGGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAGAGACTTACTCTTGATTGCGGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCACTGAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAAATAAAGAATAGTGCAGTTAGCTTGCTCAACACCACAGCAATAGCAGTAGCTGAGGGGACAGATAGGATTATAGAAGTATTACAAAGAGGCTTTAGAGCTATTCTCCACATACCTACACGAATAAGACAGGGCTTAGAAAGGGCTTTGCTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 7843, - "end": 7935, - "orientation": "forward", - "distance": 0.5806451612903225, - "indel_impact": 0, - "protein": "RPSSQPRGDPTGPKEQKKEVERETEAHPRD", - "nucleotides": "AGACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAGGAACAGAAGAAGGAGGTGGAGAGAGAGACAGAGGCACATCCACGAGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 7844, - "end": 8119, - "orientation": "forward", - "distance": 0.423913043478261, - "indel_impact": 0, - "protein": "DPPPNPEGTRQARRNRRRRWRERQRHIHEISAWILSTHLGRPAEPVPLQLPPLERLTLDCGEDCGTSGTQGVGSTEVLVESPAVLESGNKE", - "nucleotides": "GACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAGGAACAGAAGAAGGAGGTGGAGAGAGAGACAGAGGCACATCCACGAGATTAGTGCATGGATTCTTAGCACTCATCTGGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAGAGACTTACTCTTGATTGCGGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCACTGAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAAATAAAGAATAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8263, - "end": 8904, - "orientation": "forward", - "distance": 0.7104072398190044, - "indel_impact": 0, - "protein": "MGNKLSRGLRAGWPTIRERMRRARPVREPEPAAAGVGAASRDLERHGALTSSNTAATNADVACLEAQQEEEEVGFPVRPQVPLRPMTYKGALDISHFLKEKGGLDGLIYSKRRQDILDLWLYNTQGYFPDWQNYTPGPGVRFPLCFGWCFKLVPVDPDKVEEASVGENNCLLSPENLHGMEDEHREVLQWRFDSRLAFHHVARELHPEYYKDC", - "nucleotides": "ATGGGTAACAAGTTGTCAAGAGGGCTCAGGGCTGGATGGCCTACCATAAGGGAGAGAATGAGACGAGCTAGGCCAGTAAGAGAGCCAGAGCCAGCAGCAGCTGGGGTGGGAGCAGCATCTCGAGACCTGGAAAGACATGGAGCACTTACAAGTAGCAATACAGCAGCTACCAATGCTGATGTTGCCTGCCTGGAAGCACAACAGGAGGAGGAAGAGGTGGGTTTTCCAGTCAGACCTCAGGTACCTTTAAGGCCAATGACTTACAAGGGAGCTCTAGATATTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGATGGGCTAATTTACTCCAAAAGAAGACAAGACATCCTTGATCTGTGGCTCTACAACACACAAGGCTACTTCCCTGACTGGCAGAACTACACACCAGGGCCAGGAGTCAGATTTCCACTGTGTTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGATAAGGTAGAAGAGGCCAGTGTAGGAGAGAACAACTGCTTGTTAAGCCCCGAGAACCTGCATGGAATGGAGGACGAACACAGAGAAGTATTGCAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTATTACAAGGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115464.1": [ - { - "region": "gag", - "start": 794, - "end": 2296, - "orientation": "forward", - "distance": 1.495, - "indel_impact": 637, - "protein": "MTETLLVQNANPDCKTILKALRPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSNIIMMQRGNFRNQRKTVKCFNCGKERHIARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLRKIWPSSKGRPRNFLQSRPEPTAPPEESFRFREETATPPQKQEPVDKEVYPLASLKSLFGNDPSSQ", - "nucleotides": "ATAAGTGCGAGAGCGTCTGTATTAAGCGGGGGAGAATTAGATAAATGGGAAAAAATTCGGTTAAGGCCAGGAGGAAAGAAAAGATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAACCCCGGCCTTTTAGAAACAACAGAAGGATGTAAACAAATACTGGAACAGCTGCAACCATCCCTTCCGACAGGATCAGAAGAACTTAGATCATTATTTAATACAGTAGCAACCCTCTATTGTGTGCATAAGAGAATAGAGGTACAAGACACCAAGGAAGCCTTAGAAAAGATAGAAGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAGGCAGTAGCTGACAAAGGAAGTACCAGCCAGGTCAGCCAAAATTACCCGATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGGCCATATCGCCTAGAACTTTAAATGCATAGGTGAAAGTAGTAGAAGAGAAGGCCTTTAGCCCAGAGGTAATACCCATGTTTTCAGCATTATCGGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAGGCTGCAGAATAAGATAGAGTGCATCCAGTGCATGCAGGGCCTGTTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATAGATGACAAGTAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTAGGATTAAATAAGATAGTAAGAATGTATAGCCCTACCAGCATTTTGGACATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAGTCCTAAGAGCCGAGCAAGCATCACAGGATGTAAAAAATTAGATGACAGAAACCTTATTAGTCCAAAATGCAAACCCAGATTGTAAGACTATTTTAAAAGCATTAAGACCAGCAGCAACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAAATATCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGACTGTTAAGTGTTTCAATTGTGGCAAAGAAAGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAAGGAAGATCTGGCCTTCCTCCAAAGGAAGGCCAAGGAACTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTAGGGAGGAGACAGCAACTCCTCCTCAGAAGCAGGAGCCGGTGGACAAAGAAGTGTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 2089, - "end": 5100, - "orientation": "forward", - "distance": 0.3214711729622268, - "indel_impact": 1716, - "protein": "AKIKQECGIPYNPQSQEVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDNRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIKDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAAGGAAGATCTGGCCTTCCTCCAAAGGAAGGCCAAGGAACTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTAGGGAGGAGACAGCAACTCCTCCTCAGAAGCAGGAGCCGGTGGACAAAGAAGTGTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGAGGGCAGCTAAAGGAAGCTCTATTAGATACAGGAGCAGATGACACAGTATTAGAAGAAATAAGTTTGCCAGGAAGATAGAAACCAAAAATGATAGAAGGAATTGGAGGCTTTATCAAAGTAAGACAGTATGATCAGATAACTATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGCAGAAATTTGTTGACTCAGATTAGTTGTACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTACAATTAAAACCAGGAATAGATAGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAAGAAAAAAAGATTTCAAAAATTAGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGAAGAAAATTAGTAGATTTCAAGGAACTTAATAAAAGAACTCAAGACTTCTAAGAAGTTCAATTAAGAATACCACACCCCGCAAGGTTAAAAAAGAAGAAATCAATAACAGTACTAGATGTAGGTGATGCATATTTTTCAATTCCCTTAGATAAAGACTTCAAGAAGTATACTGCATTTACCATACCTAGTATAAATAATAAGACACCAGAGATTAGATATCAGTACAATGTGCTTCCACAGGGATAGAAAAGATCACCAGCAATATTCCAAAGTAGCATGATAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGACATAGTTATCTATCAATACATAGATGACTTGTATGTAAGATCTGACTTAGAAATAAGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACACTTGTTGAAGTAGAGATTGACCACACCAGATAAAAAACATCAGAAAGAACCCCCATTCCTGTGGATAAGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGACAAAGATAGCTGGACTGTCAATGACATACAGAAGCTAGTAAGAAAATTGAATTGAGCAAGTCAGATTTATGCAGAGATTAAAGTGAGACAATTATGTAAACTCCTTAAAGGAGCCAAAGCGCTAACAGAAGTGATACAACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAAAGAAATTCTAAAAGAACCAGTACATGAAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAGTTACAGAAGCAGAGGCAAGGCCAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAGTATGCAAGAACGAGAGGTACCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTGATATGAGGAAAGACCCCTAGATTTAAACTACCCATACAGAAAGAAACATAAGATACCTAGTGGACAGAATATTGGCAAGCCACCTAGATTCCCGAGTAAGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATAGTACCAATTAGAAAAAGAGCCTATTGTAGGAGCAGAAACTTTCTATGTAGATAGGGCAGCTAATAAAGAGACTAAATTAAGAAAAGCAGGATATGTTACTAGCAGAGGAAGACAAAAGGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAACTGCAAGCAATTTGTCTAGCATTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCACTAAGAATAATTCAAGCACAACCAGATAAGAGTGAATCAGAGATAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTACCTTGCATAGGTACCAGCACACAAAAGAATTAGAAGAAATGAACAAGTAGATAAATTAGTCAGTGCTAGAATCAGGAAAGTCCTATTTTTAGATAGAATAGATAAGGCCCAAGAAGAGCATAAGAAATATCACAATAATTAAAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAAAAATAGTAGCCAGTTGTGATAAATGCCAGCTAAAAAAAGAAGCCACGCATAGACAAGTAGACTGTAGTCCAAGAATATGGCAATTAGATTGTACACATTTAGAAGGAAAAGTTATCCTAGTAGCAGTTCATGTAGCCAGTAGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAAGAAACAGCATACTTCCTCTTAAAATTAGCAGGAAGGTAGCCAGTGAAAGCAATACATACAGACAATGGAACCAATTTCACCAGTGCTACGGTTAAGGCCGCCTGTTAGTAGGCAAAGATCAAGCAGGAATGTGGCATTCCCTACAATCCCCAAAGTCAAGAAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAAGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTCCAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAACAGAGATCCACTTTGGAAAGGACCAGCAAAACTTCTTTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAAGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 5045, - "end": 5623, - "orientation": "forward", - "distance": 1.494270833333335, - "indel_impact": 248, - "protein": "MENRWQVMIVWQVDRMRISTWKSLVKHHMYISKKAQGWFYRHHYENPHPRISSEVHIPLGDARLVITTY", - "nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGCACATGGAAAAGTTTAGTAAAACACCATATGTACATTTCAAAGAAAGCCCAAGGATGGTTTTATAGACATCACTATGAAAATCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCGCTAGGGGATGCTAGATTGGTAATAACAACATATTAGGGTCTGCATACAGGAGAAAGAGACTGGCATTTAGGTCAAGGAGTCTCCATAGAATGGAAGGAAAGGAAATATAGCACACAAGTAACCCCTGACCTAGCAGACCAACTAATTCACCTGTATTATTTTGATTGTTTTGCAGAATCTGCTATAAGAGAGGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCACTAACAGCATTAGTAACACCAAAAAAGACAAAGCCACCTTTGCCTAGTGTGACAAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAAGGAGCCAAACAATGAATGGACACTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5563, - "end": 5853, - "orientation": "forward", - "distance": 0.375, - "indel_impact": 0, - "protein": "MEQAPEDQGPQKEPNNEWTLELLEELKREAVRHFPRTWLHGLGQHIYETYEDTWTGVGALIRILQQLLFIHFRIRCQHSRIGITRQRRARNRASRS", - "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAAGGAGCCAAACAATGAATGGACACTAGAGCTCTTAGAAGAGCTTAAGAGAGAAGCTGTTAGACATTTTCCTAGGACATGGCTTCATGGCTTAGGACAACATATCTATGAAACTTATGAGGATACTTGGACAGGAGTAGGAGCCTTAATAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGAATTAGGTGCCAACATAGCAGAATAGGCATCACTCGACAGAGGAGAGCAAGAAATAGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5834, - "end": 6048, - "orientation": "forward", - "distance": 0.45833333333333326, - "indel_impact": 129, - "protein": "MRILGQE", - "nucleotides": "ATAGAGCCAGTAGATCCTAGATTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAGACTCCTTGTACCAAATGCTATTGTAAAAAATGCTGCTTTCATTGCCAAGTTTGTTTCACAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTACCTCTACCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5973, - "end": 6048, - "orientation": "forward", - "distance": 0.46153846153846145, - "indel_impact": 0, - "protein": "MAGRSGDSDEDLLKTVRLIKYLYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTACCTCTACCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 6065, - "end": 6310, - "orientation": "forward", - "distance": 1.8817073170731702, - "indel_impact": 124, - "protein": "MQSLYILTIVALVVAAILAIVV", - "nucleotides": "ATGCAATCTTTATATATATTAACAATAGTAGCATTAGTAGTAGCAGCAATATTAGCAATAGTTGTGTAGGCCATAGTACTCATAGAATATAAGAAAATATTAAAACAAAGGAGAATAGATAGGTTAATTGATAGAATAATAGATAAGGCAGAAGACAGTGGCAATGAGAGTGAAGGAGATCAAGAGGAGTTATCAGCATTTGTAGAGATAGGGCATCATGCTCCTTAGAATGTTGATGATCTATAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 6228, - "end": 8798, - "orientation": "forward", - "distance": 1.8316091954022926, - "indel_impact": 1449, - "protein": "MTNCTFNITTSIKDKIKKEAALFYKIDLVEIDEKKNNSSTRYRLINCNTSAITQACPKVSFKPIPIHFCAPASFAILKCNNKKFSGKGPCTNVSTVQCTHRIKPVVSTQLLLNGSLAEEEVMIRSDNITDNTKNIIVQLKEAIRIFCIRPNNNTRKSINIRPGRAFYTTGDIIRDIRQAHCNISGNWSNTLKQIATQLGKQLNQTQQIIFNSSAGKDPEIVTHSFNCGKKFFYCNSSSLFNST", - "nucleotides": "ATGAGAGTGAAGGAGATCAAGAGGAGTTATCAGCATTTGTAGAGATAGGGCATCATGCTCCTTAGAATGTTGATGATCTATAGTACTGCAGACCAGTGGTGGGTCACAGTCTATTATAAGGTACCTGTGTGGAGAGAAGCAAACACCACTCTATTTTGTGCATCAGATGCTAAAGCATATAGTACAGAGGCACATAATGTTTAGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAATAGTAATAGAAAATGTAACAGAAGATTTTAACATGTGGAAGAATAACATGGTAGATCAGATGCATGAGGATATAATCAGTTTATAGGATCAAAGTCTAAAGCCATGTGTGAAACTAACCCCACTCTGTGTTACTTTAAATTGTACTGATATTAGGAATAGTACCGTTACTTCTAATACTACTTCTAATACTACTTAGGGAGAAATGACAAACTGCACTTTCAATATCACCACAAGCATAAAGGATAAGATAAAAAAAGAAGCGGCACTTTTTTATAAAATTGATTTAGTAGAAATAGATGAAAAGAAAAATAACAGTAGTACCAGATATAGGTTGATAAATTGTAACACCTCAGCCATTACACAAGCCTGCCCAAAGGTATCCTTTAAGCCAATTCCCATACATTTTTGTGCCCCGGCTAGTTTTGCAATTCTAAAGTGTAATAATAAGAAGTTCAGTGGAAAAGGACCATGTACAAATGTCAGCACAGTGCAATGTACACATAGAATTAAGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAATGATTAGATCTGACAATATCACAGACAATACTAAAAACATAATAGTACAGTTGAAAGAAGCCATACGAATTTTCTGTATAAGACCCAACAACAATACAAGAAAAAGTATAAATATAAGACCAGGAAGAGCATTTTATACAACAGGAGATATAATAAGAGACATAAGGCAAGCACATTGTAACATTAGTGGAAATTGGAGTAACACTTTAAAACAGATAGCTACACAATTAGGAAAACAACTGAATCAAACACAACAAATAATCTTTAATTCATCCGCAGGAAAGGACCCAGAGATTGTAACACACAGTTTTAATTGTGGAAAGAAATTCTTCTATTGTAATTCATCATCACTGTTTAATAGTACCTAGACTAAAAATGGTACTGATAGTTGGCAGTCTAATGATACTCAGAATAGTAATATCACACTCCAATGCAGAATAAAACAAATTATAAACCTGTGGCAGGAAGTAAGAAAAGCAATGTATGCCCCTCCCATCAGTAGACAAATTAACTGTACATCAAATATTACAGGGCTAGTTTTAACAAGAGATAGGAGGAATGAAACTAAGACCTTTAGACCTGGAAGAGAAAATATGAAGGATAATTGGAGAAGTAAATTATATAAATATAAAGTAGTAAGAATTGAACCATTAAGAATAGCACCCACCAAGGCAAAAAGAAGAGTGGTGCAGAGAGAGAAAAGAGCAGTAAGACTAGGAGCTATGTTCCTTAAGTTCTTAGGAGCAGCCAGAAGCACTATAGGCGCAGCGTCGATAGCGCTGACGGAACAGGCCAGACGAGTCTTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATTTGTTGCAACTCACAGTCTAAGGCATTAAGCAGCTCCAGGCAAGAGTCCTGGCTGTAGAAAGATACCTACAGGATCAACAGCTCCTAGGACTTTGAGGTTGCTCTAGAAAACTCATTTGCACCACTACTGTGCCTTAGAATCGTAGTTGAGGTAGGCATAACAAAAATTACAAAAGTCTAGATGACATTTAGGATAACATGACCTAGATAGAGTAGGAAAAAGAAATTGACAATTACACAAGCTTAATATACACCTTAATTACAGAATCACATTCCCAACAAAAAAAGAATGAACAAGAATTATTGGCATTAGATAAATAGGCAAGTTTGTAGAATTAGTTTGACATATCACAATGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTAGTAAGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAAAGTTAGGCAAGGATACTCACCATTATCATTTCAGACCCTCCTCCCAGCCCCGAGAGGAGCCGACAGGCCCGAAAGAATAGAAGAAAGAGGTAGAGAGAGAGACAAAGGCAGATCCGGTCGATTAGTGAACGGATTCTTAGCACTTATCTAGGACGATCTACGGAGCCTATGCCTCTTCAGCTACCACCGCTTGAGCGACTTACTCTTGATTGTGATCAAGATTGTGGAACTTCTAAGACGCAAGAGGTAAGAAGCCCTCAAATATTAGTAGAATCTCCTGCAGTACTAGAGTCAGGAACTAAAGAATAGTGCTGTCAGCTTGCTCAACACCACAGCTATAGTAGTAGCTGAAAGGACAGATAAGATAATAGAAATATTACAAAGAATTAGTAGAGCTTTTCTCCACATACCTAGGAGAATAAGACAGGGCTTAGAAAAGGCTTTGCTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8380, - "end": 8472, - "orientation": "forward", - "distance": 0.7741935483870968, - "indel_impact": 37, - "protein": "RPSSQPREEPTGPKE", - "nucleotides": "AGACCCTCCTCCCAGCCCCGAGAGGAGCCGACAGGCCCGAAAGAATAGAAGAAAGAGGTAGAGAGAGAGACAAAGGCAGATCCGGTCGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8381, - "end": 8656, - "orientation": "forward", - "distance": 0.5217391304347827, - "indel_impact": 45, - "protein": "RERQRQIRSISERILSTYLGRSTEPMPLQLPPLERLTLDCDQDCGTSKTQEVRSPQILVESPAVLESGTKE", - "nucleotides": "GACCCTCCTCCCAGCCCCGAGAGGAGCCGACAGGCCCGAAAGAATAGAAGAAAGAGGTAGAGAGAGAGACAAAGGCAGATCCGGTCGATTAGTGAACGGATTCTTAGCACTTATCTAGGACGATCTACGGAGCCTATGCCTCTTCAGCTACCACCGCTTGAGCGACTTACTCTTGATTGTGATCAAGATTGTGGAACTTCTAAGACGCAAGAGGTAAGAAGCCCTCAAATATTAGTAGAATCTCCTGCAGTACTAGAGTCAGGAACTAAAGAATAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8800, - "end": 9450, - "orientation": "forward", - "distance": 1.1589371980676328, - "indel_impact": 178, - "protein": "MTYKAALDLSHFLKEKGGLEGLVYSQKRQDILDLWIYHTQGYFPDWQNYTPGPRIRYPLTFGWCFKLVPLEPDQVEEANEGENNSLLHPLSQHRMDDPEKEVLVWRFDSRLAFHHMAREKHPEFYKDC", - "nucleotides": "ATAGGTGGCAAGTGGTCAAAAAGTAGTAAGGTTAAATAGAATGCAGTGAAAGAAAGAATAAGACGAGCTCAGCCAACAGCAGATAAAGAACGAGCTGAGCCAGCAGCAGATAAGGTAAGAGCAGCATCTAGAGACCTAGAAAAATATGGAGCACTTACAAGTAAGAATACAGCAGCTACTAATGCTGATTGTGCCTGGCTAGAAGCACAAGAAGAGGAGGATGAGGTAGGTTTTCCAGTCAGACCTCAGTTACCTTTAAGACCCATGACTTATAAAGCAGCTCTAGATCTGAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAGTTTACTCCCAAAAAAGGCAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTATTTCCCTGATTGGCAGAACTACACACCAGGGCCAAGGATCAGATATCCCCTGACCTTTGGATGGTGCTTCAAGCTAGTACCACTTGAGCCAGATCAGGTAGAAGAGGCCAATGAAGGAGAGAACAACAGCCTGTTACACCCTCTGAGCCAGCATAGGATGGATGACCCGGAGAAAGAAGTGCTAGTGTGGAGATTTGACAGCCGCCTCGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTTCTACAAAGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115530.1": [ - { - "region": "gag", - "start": 746, - "end": 2245, - "orientation": "forward", - "distance": 0.2675944333996021, - "indel_impact": 0, - "protein": "MGARASILSGGKLDSWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", - "nucleotides": "ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATTCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGTCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGTTAATGTAATGATGCAAAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 2038, - "end": 5049, - "orientation": "forward", - "distance": 0.15109343936381703, - "indel_impact": 0, - "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVHQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACGTGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCCATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTGCTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4994, - "end": 5572, - "orientation": "forward", - "distance": 0.37823834196891193, - "indel_impact": 0, - "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPDLADQLIHLYYFDCFSESAIRNAILGHIVNPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", - "nucleotides": "ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACACATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTGACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAATCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5512, - "end": 5802, - "orientation": "forward", - "distance": 0.2886597938144331, - "indel_impact": 0, - "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", - "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5783, - "end": 5997, - "orientation": "forward", - "distance": 0.25, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5922, - "end": 5997, - "orientation": "forward", - "distance": 0.46153846153846145, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 6014, - "end": 6259, - "orientation": "forward", - "distance": 0.6931034482758622, - "indel_impact": 0, - "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL", - "nucleotides": "ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 6177, - "end": 8717, - "orientation": "forward", - "distance": 0.528604118993135, - "indel_impact": 0, - "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTESHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSKEGKMAEEMRNCSFNITTEIRNKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKSVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", - "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGTCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAATATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTAAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAGAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATGATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAGTGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGGGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGGGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTATTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8299, - "end": 8391, - "orientation": "forward", - "distance": 0.29032258064516125, - "indel_impact": 0, - "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", - "nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8300, - "end": 8575, - "orientation": "forward", - "distance": 0.423913043478261, - "indel_impact": 0, - "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", - "nucleotides": "GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8719, - "end": 9333, - "orientation": "forward", - "distance": 0.5333333333333332, - "indel_impact": 0, - "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTYKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", - "nucleotides": "ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTACAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTACGAGCCTGCATGGGGTGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115520.1": [ - { - "region": "gag", - "start": 695, - "end": 2194, - "orientation": "forward", - "distance": 0.2675944333996021, - "indel_impact": 0, - "protein": "MGARASILSGGKLDSWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", - "nucleotides": "ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATTCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGTTAATGTAATGATGCAAAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1987, - "end": 5003, - "orientation": "forward", - "distance": 1.448607975921763, - "indel_impact": 1225, - "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEAFLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDMGNGQYSL", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAAATAGGGGGGCAATTTAAAGAAGCTTTCTTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTAAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATATGGGAAATGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTGCTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4948, - "end": 5526, - "orientation": "forward", - "distance": 0.37823834196891193, - "indel_impact": 0, - "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", - "nucleotides": "ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5466, - "end": 5756, - "orientation": "forward", - "distance": 0.2886597938144331, - "indel_impact": 0, - "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", - "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5737, - "end": 5951, - "orientation": "forward", - "distance": 0.25, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGCAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5876, - "end": 5951, - "orientation": "forward", - "distance": 0.46153846153846145, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5968, - "end": 6213, - "orientation": "forward", - "distance": 0.6931034482758622, - "indel_impact": 0, - "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL", - "nucleotides": "ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 6131, - "end": 8671, - "orientation": "forward", - "distance": 0.5251716247139588, - "indel_impact": 0, - "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEVHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRDKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGNMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLDQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", - "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAGATAAGATGCAGAAAGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATGATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGGGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGGGACTGAGACCTTCAGACCTGGAGGAGGAAATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGGATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTTAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8253, - "end": 8345, - "orientation": "forward", - "distance": 0.29032258064516125, - "indel_impact": 0, - "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", - "nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8254, - "end": 8529, - "orientation": "forward", - "distance": 0.423913043478261, - "indel_impact": 0, - "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", - "nucleotides": "GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8673, - "end": 9287, - "orientation": "forward", - "distance": 0.5478260869565217, - "indel_impact": 0, - "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDNEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", - "nucleotides": "ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAATGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGATACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTGCACCCTACGAGCCTGCATGGGGTGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115503.1": [ - { - "region": "gag", - "start": 817, - "end": 2316, - "orientation": "forward", - "distance": 0.2675944333996021, - "indel_impact": 0, - "protein": "MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", - "nucleotides": "ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGACGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGCTAATGTAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 2109, - "end": 5120, - "orientation": "forward", - "distance": 0.15109343936381703, - "indel_impact": 0, - "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTACTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 5065, - "end": 5643, - "orientation": "forward", - "distance": 0.36269430051813467, - "indel_impact": 0, - "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", - "nucleotides": "ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5583, - "end": 5873, - "orientation": "forward", - "distance": 0.2886597938144331, - "indel_impact": 0, - "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", - "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5854, - "end": 6068, - "orientation": "forward", - "distance": 0.25, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5993, - "end": 6068, - "orientation": "forward", - "distance": 0.46153846153846145, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 6085, - "end": 6330, - "orientation": "forward", - "distance": 0.727586206896552, - "indel_impact": 0, - "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL", - "nucleotides": "ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAAAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 6248, - "end": 8788, - "orientation": "forward", - "distance": 0.5241695303550973, - "indel_impact": 0, - "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", - "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGAAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAAAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8370, - "end": 8462, - "orientation": "forward", - "distance": 0.29032258064516125, - "indel_impact": 0, - "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", - "nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8371, - "end": 8646, - "orientation": "forward", - "distance": 0.3913043478260869, - "indel_impact": 0, - "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", - "nucleotides": "GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8790, - "end": 9404, - "orientation": "forward", - "distance": 0.5333333333333332, - "indel_impact": 0, - "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", - "nucleotides": "ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTATGAGCCTGCATGGGATGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115570.1": [ - { - "region": "gag", - "start": 687, - "end": 2186, - "orientation": "forward", - "distance": 0.2616302186878725, - "indel_impact": 0, - "protein": "MGARASILSGGKLDEWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", - "nucleotides": "ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGAATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCGACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGCTAATGTAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1979, - "end": 4990, - "orientation": "forward", - "distance": 0.14811133200795235, - "indel_impact": 0, - "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTGCTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4935, - "end": 5513, - "orientation": "forward", - "distance": 0.36269430051813467, - "indel_impact": 0, - "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", - "nucleotides": "ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTATTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5453, - "end": 5743, - "orientation": "forward", - "distance": 0.2886597938144331, - "indel_impact": 0, - "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", - "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5724, - "end": 5938, - "orientation": "forward", - "distance": 0.25, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5863, - "end": 5938, - "orientation": "forward", - "distance": 0.46153846153846145, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5955, - "end": 6200, - "orientation": "forward", - "distance": 0.6931034482758622, - "indel_impact": 0, - "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL", - "nucleotides": "ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 6118, - "end": 8658, - "orientation": "forward", - "distance": 0.5325714285714285, - "indel_impact": 0, - "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTSEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDKDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", - "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCTCAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAAGGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGGGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8240, - "end": 8332, - "orientation": "forward", - "distance": 0.29032258064516125, - "indel_impact": 0, - "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", - "nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8241, - "end": 8516, - "orientation": "forward", - "distance": 0.3913043478260869, - "indel_impact": 0, - "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", - "nucleotides": "GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8660, - "end": 9274, - "orientation": "forward", - "distance": 0.5333333333333332, - "indel_impact": 0, - "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", - "nucleotides": "ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTATGAGCCTGCATGGGATGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115509.1": [ - { - "region": "gag", - "start": 555, - "end": 2054, - "orientation": "forward", - "distance": 0.2675944333996021, - "indel_impact": 0, - "protein": "MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", - "nucleotides": "ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGACGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGCTAATGTAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1847, - "end": 4858, - "orientation": "forward", - "distance": 0.15109343936381703, - "indel_impact": 0, - "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTACTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4803, - "end": 5381, - "orientation": "forward", - "distance": 0.36269430051813467, - "indel_impact": 0, - "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", - "nucleotides": "ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5321, - "end": 5611, - "orientation": "forward", - "distance": 0.2886597938144331, - "indel_impact": 0, - "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", - "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5592, - "end": 5806, - "orientation": "forward", - "distance": 0.25, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5731, - "end": 5806, - "orientation": "forward", - "distance": 0.46153846153846145, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5823, - "end": 6068, - "orientation": "forward", - "distance": 0.727586206896552, - "indel_impact": 0, - "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL", - "nucleotides": "ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAAAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 5986, - "end": 8526, - "orientation": "forward", - "distance": 0.5241695303550973, - "indel_impact": 0, - "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", - "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGAAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAAAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8108, - "end": 8200, - "orientation": "forward", - "distance": 0.29032258064516125, - "indel_impact": 0, - "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", - "nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8109, - "end": 8384, - "orientation": "forward", - "distance": 0.3913043478260869, - "indel_impact": 0, - "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", - "nucleotides": "GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8528, - "end": 9142, - "orientation": "forward", - "distance": 0.5333333333333332, - "indel_impact": 0, - "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", - "nucleotides": "ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTATGAGCCTGCATGGGATGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115702.1": [ - { - "region": "gag", - "start": 246, - "end": 1781, - "orientation": "forward", - "distance": 0.342940038684721, - "indel_impact": 0, - "protein": "MGARASVLSGGELDKWEKIRLRPGGRKRYKLKHIVWASRELERFAVNPGLLETSEGCKQIMGQLQPALQTGSEELRSLYNTVAVLYCVHQRIDVKDTKEALDKIEEEQNKSKKKTQQAAAADTGNNSQVSQNYPIVQNMQGQMVHQAISPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVQAGPVAPGQIREPRGSDIAGTTSTLQEQIAWMTHNPPVPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKGWMTETLLVQNANPDCKTILKALGPGATLEEMMTACQGVGGPSHKARVLAEAMSQATGAHAIMMQRGNFKNQRKTVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESLRPTAPPVESFRFGEETAAPFQKQEPRDKEMSPLASLKSLFGNDQ", - "nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGCGGCGAATTAGACAAATGGGAAAAAATTCGGTTAAGGCCAGGAGGAAGGAAACGATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGCTTCGCAGTCAACCCTGGCCTGTTAGAAACATCAGAAGGCTGCAAACAAATAATGGGACAACTCCAACCAGCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAGTCCTCTATTGTGTACATCAGAGGATAGATGTAAAGGATACCAAAGAAGCTTTAGACAAAATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGACACAGCAAGCAGCAGCCGCTGACACAGGAAACAACAGCCAAGTCAGCCAAAATTACCCCATAGTGCAGAACATGCAGGGACAAATGGTACATCAGGCCATATCACCCAGAACCCTAAATGCATGGGTAAAAGTAATAGAAGAGAAGGCATTCAGCCCAGAAGTAATACCCATGTTTTCAGCACTATCAGAGGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAGGCTGCAGAATGGGATAGAGTGCATCCAGTGCAGGCAGGACCTGTTGCACCAGGCCAGATAAGGGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACACATAATCCACCCGTCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGACTAAATAAAATAGTAAGGATGTATAGCCCTGTCAGCATTTTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAGACTCTAAGAGCTGAGCAAGCTTCACAGGAAGTAAAAGGTTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACCATTTTAAAAGCATTGGGACCAGGAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCAGCCATAAGGCAAGGGTTTTGGCAGAAGCAATGAGCCAAGCAACAGGTGCACATGCCATAATGATGCAGAGAGGCAATTTTAAGAACCAAAGAAAGACTGTTAAGTGCTTCAATTGTGGCAAAGAAGGGCATATAGCCAGAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGACTGCACTGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAGGAGAGCCTCAGGCCAACAGCCCCACCAGTAGAGAGCTTCAGGTTTGGGGAAGAGACAGCAGCCCCCTTTCAGAAGCAGGAACCGAGAGACAAGGAGATGTCTCCCTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCAGTAGTCACAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1544, - "end": 4585, - "orientation": "forward", - "distance": 0.20128078817733996, - "indel_impact": 0, - "protein": "FFRENLAFPQGKAGEFPSEQTRANSPTRGEPQANSPTSRELQVWGRDSSPLSEAGTERQGDVSLSFPQITLWQRPVVTIKIGGQIKEALLDTGADDTVLEEMALPGRWKPKMIGGIGGFIKVRQYDQIAIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKIKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPIFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSITVLDVGDAYFSVPLDEEFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQCSMTKILEPFRKQNPEIVIYQYMDDLYVGSDLEIGQHRTKIEELRGHLLKWGFTTPDKKHQKEPPFLWMGYELHPDRWTVQPIKLPEKEIWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPTKELIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKISTESIVIWGKTPKFKLPIQKETWEIWWTDYWQATWIPEWEFVSTPPLVKLWYQLEKEPIVGAETFYVDGASNRETKLGKAGYITDRGRQKVVTLNDTTNQKTELQAILLALQDSGLEANIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKIYLTWVPAHKGIGGNEQVDKLVSTGIRKVLFLDGIDRAQEEHERYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQIDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKTIHTDNGSNFTSTTVKAACWWAGVKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQISKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAGGAGAGCCTCAGGCCAACAGCCCCACCAGTAGAGAGCTTCAGGTTTGGGGAAGAGACAGCAGCCCCCTTTCAGAAGCAGGAACCGAGAGACAAGGAGATGTCTCCCTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCAGTAGTCACAATAAAGATAGGGGGGCAAATAAAGGAAGCTCTATTAGACACAGGAGCAGATGATACAGTATTAGAAGAAATGGCGTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATAGCCATAGAAATTTGTGGACATAAAGCAATTGGTACAGTATTAGTAGGACCTACACCTGTCAATATAATTGGAAGAAATCTATTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAGTTAAAGCCAGGAATGGATGGCCCAAAAATTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATAGGGCCTGAAAATCCATACAATACTCCAATATTTGCCATAAAGAAAAAAGACAGTACTAAGTGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGATTAAAAAAGAAAAAATCAATAACAGTACTGGATGTGGGTGATGCCTATTTTTCAGTTCCCTTAGATGAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGCATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAATGTAGCATGACAAAAATATTAGAGCCTTTTAGAAAACAAAATCCAGAAATAGTTATCTATCAATACATGGACGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTAAGAGGACATCTGTTGAAGTGGGGGTTTACCACACCAGACAAAAAACATCAGAAGGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAGATGGACAGTACAGCCTATAAAGCTGCCAGAGAAAGAAATCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAACTGGGCAAGTCAGATTTACCCAGGAATTAAAGTAAAACAATTATGTAAACTCCTTAGGGGAACCAAAGCATTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAAATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCAACAAAAGAATTAATAGCAGAAATACAGAAGCAAGGGCAAGGCCAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCGAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAGAAAATATCTACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCCATACAAAAGGAAACATGGGAAATATGGTGGACAGACTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAGTACTCCTCCCCTAGTAAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCATCTAATAGGGAGACTAAATTAGGAAAAGCAGGATATATTACTGACAGAGGAAGACAAAAGGTTGTCACCCTAAATGACACAACCAATCAAAAGACAGAGTTACAAGCAATTCTTCTAGCATTGCAGGATTCAGGATTAGAAGCAAACATAGTGACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGATCTACCTGACATGGGTACCAGCACATAAAGGAATTGGAGGAAATGAGCAAGTAGATAAATTAGTCAGTACTGGGATTAGGAAAGTATTATTTTTAGATGGAATAGATAGGGCCCAAGAAGAGCATGAGAGATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTCAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCTATGCATGGACAAATAGACTGTAGTCCAGGAATATGGCAGCTAGATTGTACACACTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATACTTTATCTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTACCACAGTTAAGGCCGCCTGTTGGTGGGCGGGGGTCAAGCAGGAATTTGGTATTCCCTACAATCCCCAGAGTCAAGGAGTGGTAGAATCTATGAATAAAGAATTAAAGAAAATAATAGGACAGGTCAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATTCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTTCAAAAATTCAAAACTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCGGTAGTAATACAGGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4530, - "end": 5108, - "orientation": "forward", - "distance": 0.34375, - "indel_impact": 0, - "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMYVSKKTKGWFYRHHYESTHPKISSEVHIPLGDAELVVTTYWGLQPGERDWHLGQGVSIEWRKGRYRTHVDPNLADQLIHLHYFDCFSESAIRHAILGHRVSPRCEYQAGHNKVGSLQYLALAALRAPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH", - "nucleotides": "ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGTATGTTTCAAAGAAAACTAAGGGATGGTTTTATAGACATCACTATGAGAGCACTCATCCAAAAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTGAGTTGGTAGTAACAACATATTGGGGTTTGCAGCCAGGGGAAAGGGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAGGGAGATATAGAACACACGTGGACCCTAACCTAGCAGACCAACTAATTCATCTGCATTACTTTGATTGTTTTTCAGAATCTGCTATAAGACATGCCATATTAGGACATAGAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCACTAGCAGCATTAAGAGCACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACTAAACTAACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5048, - "end": 5338, - "orientation": "forward", - "distance": 0.34375, - "indel_impact": 0, - "protein": "MEQAPEDQGPQREPYNEWTLELLEELKQEAVRHFPRPWLHSLGQYIYETYGDTWAGVEAIIRMLQQLLFIHFRIGCQHSRIGIIPQRRARNGSSRS", - "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAGAGCTTTTAGAAGAACTTAAGCAGGAAGCTGTTAGGCATTTTCCTAGGCCATGGCTTCATAGCTTAGGGCAATATATCTATGAAACTTATGGGGATACTTGGGCAGGGGTGGAAGCCATAATAAGAATGCTGCAACAACTGCTGTTTATTCATTTCAGAATTGGGTGTCAACATAGCAGAATAGGCATTATCCCACAGAGGAGAGCAAGAAATGGATCCAGTAGATCCTAA", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5319, - "end": 5533, - "orientation": "forward", - "distance": 0.6818181818181821, - "indel_impact": 0, - "protein": "MDPVDPNLEPWKHPGSQPKTACNNCYCKKCCLHCQVCFTRKGLGISYGRKKRRRRRGSLKGRQAHQDSLSKX", - "nucleotides": "ATGGATCCAGTAGATCCTAACCTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAGACTGCTTGTAACAATTGCTATTGTAAAAAGTGTTGCCTTCATTGCCAAGTTTGTTTCACACGAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACGGAGACGAGGATCTCTTAAAGGCCGTCAGGCTCATCAAGACTCTCTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5458, - "end": 5533, - "orientation": "forward", - "distance": 0.6923076923076923, - "indel_impact": 0, - "protein": "MAGRSGDGDEDLLKAVRLIKTLYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACGGAGACGAGGATCTCTTAAAGGCCGTCAGGCTCATCAAGACTCTCTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5550, - "end": 5807, - "orientation": "forward", - "distance": 0.945263157894737, - "indel_impact": 0, - "protein": "MLSLEVIVAITALVVAGIIAIVVWTIVLIEYRKILRQRKIDKILDRIRERAEDSGNESEGDQEELSALVEMGHNAHHAPWDIND", - "nucleotides": "ATGTTATCTTTAGAAGTAATAGTAGCAATAACAGCATTAGTAGTAGCAGGAATAATAGCAATAGTTGTGTGGACCATAGTACTTATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAAGATACTTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAGGGGGATCAGGAAGAATTGTCAGCGCTTGTGGAGATGGGGCACAATGCTCACCATGCTCCTTGGGATATTAATGATTAGTAA", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 5716, - "end": 8273, - "orientation": "forward", - "distance": 0.6706208425720628, - "indel_impact": 39, - "protein": "MRVRGIRKNCQRLWRWGTMLTMLLGILMISNATEQLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEMVLINVTENFNMWKNDMVDQMQEDIVSLWDQSLKPCVKLTPLCVTLNCTNLTIEPNNATKANISGRLEGKGEMTNCSFNVTTSLRDKRKKEYALFYKLDVVATGENNNSFRLISCNTSEITQACPKVSFEPIPIHYCAPAGFAILKCNDKKFNGTGKCNNVSIVQCTHGIRPVVSTQLLLNGSLAEEEVVVRSANFSDNTKTIIVQLNKTVVINCTRPNNNTRRSIHIAPGRAFYATGDIIGDIRKAHCNISKEDWNTTLNQVAKKLQEQFENATIDFKPSSGGDPEIVMHSFNCGGEFFYCNTTELFSWNATTKLFTWNATNSNNGTIILPCRIKQIINMWQEVGKAMYAPPIRGQIRCSSNITGLLLTRDGGTNGTGNRNETFRPGGGNMKDNWRSELYKYKVVEIKPLGVAPTKAKRRVVQREKRAVTIGAMFLGFLGAAGSTMGAASLTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLRDQQLLGIWGCSGKLICTTTVPWNISWSNRTLNNIWDNLTWMQWDTEINNYTNKIYQLLEEAQNQQEKNEQELLELDKWANLWNWFDISNWLWYIKIFILIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTRFPVPRGPDRPEGTEEEGGERDRDRSDRLVNGFLTLIWVDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNVLQYWSQELKNSAVSLLNATAIVVAEGTDRIIELAQRICRAE", - "nucleotides": "ATGAGAGTGAGGGGGATCAGGAAGAATTGTCAGCGCTTGTGGAGATGGGGCACAATGCTCACCATGCTCCTTGGGATATTAATGATTAGTAATGCTACAGAACAATTGTGGGTCACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAACAACCACTCTATTCTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAATGGTATTAATAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGATCAAATGCAAGAGGACATAGTCAGCTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACCTTAAATTGCACTAATTTGACCATTGAGCCAAACAATGCTACTAAAGCCAATATTAGTGGGAGGTTAGAGGGGAAAGGAGAAATGACAAACTGCTCTTTCAATGTCACCACAAGCCTAAGAGATAAGAGGAAGAAAGAATATGCACTCTTTTATAAACTTGATGTAGTAGCAACAGGTGAAAATAATAACAGCTTTAGGTTGATAAGTTGTAATACCTCAGAGATTACACAGGCCTGTCCAAAGGTATCATTTGAACCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAATGATAAAAAGTTCAATGGAACAGGAAAATGTAACAATGTCAGCATAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAGTTAGATCTGCCAATTTCTCAGACAATACTAAGACCATAATAGTACAGCTGAACAAAACTGTAGTAATTAATTGTACAAGACCCAACAACAATACAAGGAGAAGTATACATATAGCACCAGGGAGAGCATTTTATGCAACAGGAGATATAATAGGAGATATAAGAAAAGCACATTGTAACATTAGTAAAGAAGATTGGAATACCACTTTAAACCAGGTGGCTAAAAAATTACAAGAACAATTTGAGAATGCAACAATAGACTTTAAACCATCCTCAGGAGGGGACCCAGAAATTGTAATGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACAACGGAACTATTTTCTTGGAATGCTACAACAAAACTGTTTACTTGGAATGCTACAAATAGCAATAATGGAACCATCATACTCCCATGTAGAATAAAACAAATTATAAACATGTGGCAAGAGGTAGGAAAAGCAATGTATGCCCCTCCCATTCGTGGACAAATTAGATGTTCGTCAAATATTACAGGACTGCTATTAACAAGAGATGGTGGGACTAACGGGACAGGGAACAGGAATGAGACCTTCAGACCTGGAGGGGGAAATATGAAGGACAATTGGAGAAGTGAATTATATAAATACAAAGTAGTAGAAATTAAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGACCATAGGAGCTATGTTCCTTGGGTTCCTGGGGGCAGCAGGAAGCACTATGGGCGCAGCATCACTGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATCTGCTGAGGGCGATTGAGGCGCAGCAGCATCTGTTGCAACTCACAGTCTGGGGCATAAAACAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGGTACCTAAGAGATCAACAGCTCCTGGGGATTTGGGGCTGCTCTGGAAAACTCATCTGCACCACTACTGTGCCTTGGAATATTAGTTGGAGTAATAGAACTCTGAATAACATTTGGGACAATTTGACTTGGATGCAGTGGGATACAGAAATTAACAATTACACAAACAAAATATACCAATTACTTGAGGAAGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAATTTGTGGAATTGGTTTGACATATCAAACTGGCTGTGGTACATAAAAATATTCATATTAATAGTAGGAGGCTTAATAGGTTTAAGAATAGTTTTTACTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTTCAGACCCGCTTCCCAGTCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGACAGATTAGTGAACGGATTCTTGACACTTATCTGGGTCGATCTGAGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCACTCAAATATTGGTGGAATGTCCTGCAATATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTAAATGCCACAGCCATAGTAGTAGCTGAGGGGACAGATAGGATTATAGAATTAGCACAAAGAATTTGTAGAGCAGAATAAGACAGGGCTTGAAAAGGCTTTTGCTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 7874, - "end": 7966, - "orientation": "forward", - "distance": 0.29032258064516125, - "indel_impact": 0, - "protein": "RPASQSRGDPTGPKEPKKKVERETETDPTD", - "nucleotides": "AGACCCGCTTCCCAGTCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGACAGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 7875, - "end": 8150, - "orientation": "forward", - "distance": 0.326086956521739, - "indel_impact": 0, - "protein": "DPLPSPEGTRQARRNRRRRWRERQRQIRQISERILDTYLGRSEEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSTQILVECPAILESGTKE", - "nucleotides": "GACCCGCTTCCCAGTCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGACAGATTAGTGAACGGATTCTTGACACTTATCTGGGTCGATCTGAGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCACTCAAATATTGGTGGAATGTCCTGCAATATTGGAGTCAGGAACTAAAGAATAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8275, - "end": 8895, - "orientation": "forward", - "distance": 0.5454976303317536, - "indel_impact": 0, - "protein": "MGGKWSKHSKSEWADVRERMAQTEAAADGVGAVSRDLERHGAITSSNTATNNAACAWLEAQEEEEVGFPVKPQVPLRPMTYKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRFPLCFGWCFKLVPVDPDKVEEANKGENNSLLHPMSLHGMEDTEREVLMWKFDSRLAFHHVAREKHPEYFKDC", - "nucleotides": "ATGGGTGGCAAGTGGTCAAAACATAGTAAGAGTGAATGGGCTGATGTAAGGGAAAGAATGGCACAAACTGAGGCAGCAGCAGATGGGGTGGGAGCAGTATCTCGAGATCTGGAAAGACATGGAGCAATCACAAGTAGCAATACAGCAACTAACAATGCTGCTTGTGCTTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCAAACCTCAGGTGCCTTTGAGACCAATGACCTACAAGGGAGCTTTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGTTAATTTATTCCCAAAAAAGACAAGACATCCTTGATCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGAACCAGATTCCCACTGTGCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGATAAGGTGGAAGAAGCCAATAAAGGAGAGAACAACAGCTTGTTACACCCTATGAGCCTGCATGGGATGGAGGACACCGAGAGAGAGGTGTTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACGTAGCCAGAGAGAAACATCCGGAGTACTTCAAGGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115095.1": [ - { - "region": "gag", - "start": 188, - "end": 1696, - "orientation": "forward", - "distance": 1.8384000000000005, - "indel_impact": 806, - "protein": "MTETLLVQNANPDCKTILKALRPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTKSASIMVQGGNLKNQRKNVKCFNCGKEGHTAKNCRAPKKKGC", - "nucleotides": "ATAGGTGCGAGAGCGTCAGTATTAAGCGGCGGAGAATTAGATAGATAGGAAAAAATTTACCTAAGGCCAGGGAGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTCAACCCTGGCCTGTTAAAGTCATCAGAAGGCTGCAGGCAAATTCTAGGACAGCTACAACCAGCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAATAGCAGTCCTCTATTGTGTACATCAAAAGATAAATGTAAAAGACACCAAAGAAGCTCTAGACAAAATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAACAAGCAGCAGCTGACACAGGAAACAGCAGCAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACCTACAAGGGCAAATAGTACATCAACCCATATCACCTAGAACTTTAAATGCATAGGTAAAAGTAATAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCACTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGAGAAGACATCAAGCAGCCATGCAGATGTTAAAAGAAACCATCAATGATGAAGCTGCAGAATAAGATAGATTGCATCCAGTGCATGCAGGCCCTATTGCACCAGGCCAGATGAAGGAACCAAGAAGAAGTGACATAGCAGGAACCACTAGTACCCTTCAGGAACAAATAAGATAGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATAGATAATCCTGAGGTTAAATAAAATAGTAAAAATGTATAGCCCTGTCAGCATTTTGGACATAAGACAAAGACCAAAGGAACCCTTTAAGGATTATGTAGACCGGTTCTATAAAACTCTAAAGGCTGAACAAGCGTCACAGGATGTAAAAAATTAGATGACAGAAACCTTGTTAGTCCAAAATGCGAATCCAGATTGTAAGACCATTTTAAAAGCATTAAGACCAGCAGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCACAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAAGTCAGCCTCCATAATGGTGCAAGGAGGCAATTTAAAGAACCAAAGAAAGAATGTTAAGTGTTTCAATTGTGGCAAAGAAGGACACACAGCCAAAAATTGCAGGGCCCCTAAGAAAAAAGGCTGTTAGAAATGTGGAAAGAAAGGACACCAAATGAAAGATTGTACTAAGAGACAGGCTAATTTTTTAAGGAAGATTTGGCCTTCCCACAAGGGAAGGCCAAAGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTAAGGAAGAGGCAACAGCTCCCCCTCAGAAGCAGGAGACGAAGGACCAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1489, - "end": 4500, - "orientation": "forward", - "distance": 0.45376984126984143, - "indel_impact": 1865, - "protein": "KPKIIEGIRGFIKVRQYDQVPIKICRHKAISTVLIRPTPVNIIRRNLLTQLSCTLNFPISPIETVPVKLKPGIDSPKVKQWPLTEEKIKALIEICAEIEKERKITKIRPKNPYNTPVFAIKKKDSTK", - "nucleotides": "TTTTTTAAGGAAGATTTGGCCTTCCCACAAGGGAAGGCCAAAGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTAAGGAAGAGGCAACAGCTCCCCCTCAGAAGCAGGAGACGAAGGACCAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAAAAATAAGAAGGCAACTAAAAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAAAAGAAATAAATTTGCCAGGAAAATAGAAACCAAAAATAATAGAAGGAATTAGAGGTTTTATCAAAGTAAGACAGTATGATCAGGTACCCATAAAAATTTGTAGACATAAAGCTATAAGTACAGTATTAATAAGACCTACACCAGTCAACATAATTAGAAGAAATCTGTTGACCCAGCTTAGTTGCACTTTAAATTTTCCCATCAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATAGATAGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAGGCATTAATAGAAATTTGTGCAGAAATAGAAAAAGAAAGGAAAATTACAAAAATTAGGCCTAAGAATCCATATAATACTCCAGTGTTTGCCATAAAGAAAAAAGACAGTACTAAATAGAGAAAATTAGTAGATTTCCGAGAACTTAATAAAAGAACACAAGACTTTTAAAAAGTTCAACTAAGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTAGATGTAGGTGATGCATATTTTTCAGTTCCTTTAGACAAAGACTTCAGAAAGTATACTGCATTTACCATACCCAGTATAAACAATGAGGCACCAAGAGTTAGATATCAGTACAATGTGCTTCCACAAAGATAGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAAGAAAATCCAGACATAGTTATCTATCAATACATAGATGATTTGTATGTAAGATCTGACTTAGAAATAGAGCAGCACAGAACAAAAATAGAAGAACTGAGACAACATCTGTTAAAGTAAAGACTCACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTTCTTTAGATAAGTTATGAACTCCATCCTAATAAATGGACAGTACAGCCTATACAGCTGCCAGACAAAGACAGCTAGACTGTCAATGACATACAGAAGTTAGTAAGAAAGTTAAATTAGGCAAGCCAGATCTATCCAGAGATTAAAGTAAAGCAATTATGTAAACTCCTTAGAAGAACCAAAGCACTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAAGGAGATTCTAAAAGAACCAGTACATAGAGCATATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGAGGAGACCAATGGACATATCAAATTTATCAGAAGCCATTTAAAAATCTGAAAACAAAGAAATATGCAAGAACGAGAGGTGCCCACACTAATGATGTAAAACAGTTAACAGAGGCAGTGCAAAAAATAGCCCTAGAAGCCATAGTAATATAGAGAAAGACTCCTAAATTTAAACTACCTATACAAAAAGAAACATAAGAAATGTAGTAGACAGAGTATTGGCAAGCCACCTAGATTCCTGAGTAGGAATTTGTCAATACCCCTCCCTTAGTAAAATTATAGTACCAGTTAGAGAAAGAACCCATAGTAAGAGCAGAAACTTTCTATGTAGATAGGGCAGCTAATAGAGAGACTAAATTAAGAAAAGCAAGGTATGTTACGGACAGAAGAAGACAAAAAGTTGTCTCCCTAATAGACACAACAAATCAGAGGACTAAGTTACACGCAATTCATCTAGCTTTGCAGGATTCAGGATCAACAGTAAACATAGTGACAGACTCACAATATGCCTTAAAGATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGCTAATAAAAAAAGAAAAAATTTACCTGGCATAAGTCCCAGCACACAAAAGAATTAGAAGAAATGAACAAGTAGATAAATTAGTCAGTAGTAGAATCAGAAAAGTACTATTTCTAGATAGAATAGATAAAGCCCAAGAAGAACATGAAAAATATCACAGTAATTAAAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAGTGTCAGTTAAAAAGAGAACCTATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAAGAAAAATTATCCTAGTAGCAGTTCATGTAGCCAGTAGATACATAGAAGCAGAAGTTATTGCAGCAGAAACAGGGCAGGAAACAGCATACTTTATCTTAAAATTGGCAGGAAGATGGCCAGTAAAAATAATACATACAGACAATGGCAGCAATTTCACCAGCACTACAGTTAAGGCCGCCTGTTAGTAGGCAAGGATCAAGCAGAAATTTAGTATTCCCTACAATCCTCAAAGTCAAGGAGTAGTAGAATCTATGAATAATGAATTAAAGAAAATTATAGGACAGGTAAAAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATTCACAATTTTAAAAGAAAAAGGGGGATTGGGGGGTACAGTGCAGAGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAACTACAAAAACAAATTTCAAAAATTCAAAATTTTCAGGTTTATTACAGGGACAGCAGAGAACCACTTTGGAAAGGACCAGCAAAGCTCCTCTAGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCCTTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAAGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4445, - "end": 5023, - "orientation": "forward", - "distance": 1.8546875000000016, - "indel_impact": 332, - "protein": "MENRWQVMIVWQVDKMRIRTWNSLVKHHMHISKKAQR", - "nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAAGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATATTTCAAAGAAAGCTCAGAGATGAGTTTACAGACATCACCATGAAAGCCATAATCCAAAAACAAGTTCAGAAGTACACATCCCATTAAGGGAAGCAAGATTAGTAATAAAAACATATTAAGGTCTGCATACAGGAGAAAGAGACTGGCATTTAGGCCAGGGAGTATCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGGCCTGGCAGACCAACTAATTCACCTGTATTATTTTGATTGTTTTTCAGAATCTGCTATAAGGAATGCCATATTAGGACATAGAGTTAGGCCTAGGTGTGAATATCAGGCAGGACATAACAAGGTAGGATCCCTACAATACTTGGCATTAACAGCATTAATAACACCAAAAAGGAGAAAGCCACCTTTGCCCAGTGTTACAAAATTAACAGAGGACAGATGGAACAAGTCCCAGAGGACCAAAGGCCACAAAAGGAGCCATACAATGAATGGACATTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 4963, - "end": 5253, - "orientation": "forward", - "distance": 0.5, - "indel_impact": 0, - "protein": "MEQVPEDQRPQKEPYNEWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDTWAEVEAIIRTLQQLLFIHFRIRCQHSRIRIIRQRRARNRASRS", - "nucleotides": "ATGGAACAAGTCCCAGAGGACCAAAGGCCACAAAAGGAGCCATACAATGAATGGACATTAGAGCTCTTAGAAGAACTTAAGAGTGAAGCTGTTAGGCATTTTCCTAGACCATGGCTTCATAGCTTAGGACAATATATCTATGAAACTTATGGAGATACTTGGGCAGAGGTAGAAGCCATAATAAGAACCCTGCAACAACTGTTGTTCATTCATTTCAGAATTAGGTGTCAACATAGCAGGATAAGAATTATTCGACAGAGGAGAGCAAGAAATAGAGCCAGTAGATCCTAA", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5234, - "end": 5448, - "orientation": "forward", - "distance": 0.6756756756756757, - "indel_impact": 35, - "protein": "MTACNNCYCKRCCFHCQVCFTRKGLGISHGRKKRRQRRRASHSSQNHQAALPEX", - "nucleotides": "ATAGAGCCAGTAGATCCTAACTTAGAGCCCTGGAAACATCCAGGAAGTCAACCTATGACTGCTTGTAACAATTGCTATTGTAAACGGTGTTGCTTTCATTGCCAAGTTTGCTTCACAAGAAAAGGCTTAGGCATCTCCCATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5373, - "end": 5448, - "orientation": "forward", - "distance": 0.5925925925925926, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLTAVRIIKRLYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5465, - "end": 5710, - "orientation": "forward", - "distance": 1.8817073170731702, - "indel_impact": 122, - "protein": "MHALEIAAIVRLVVAAIIAIVV", - "nucleotides": "ATGCATGCCTTAGAAATAGCAGCAATAGTAAGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTAGTCTATAGTATTAATAGAATATAAGAAAATTTTAAGACAAAAGAAAATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAAAGTGACGAGGACCAGGAAGAATTATCAGCAATTGTAGAGATAGGGCATCTTGTTCCTTAGGATAGTGATGATATGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 5628, - "end": 8228, - "orientation": "forward", - "distance": 1.8735260115606882, - "indel_impact": 1426, - "protein": "MTEEGEIKNCSFNVTTGIRDKVTKEHALFYKLDVVPIDESSGNTTGKYRMINCNTSVITQACPKVSFEPIPIHFCAPASFAILKCKDKKFNRTGECRNVSTVQCTHRIRPVVSTQLLLNGSLAEEEIVIRSANLSNNAKTIIVQLNKSVKINCTRPNNNTRRSIHIGPRRAFYRTDIIGDIRQAHCNISRKD", - "nucleotides": "ATGAAAGTGACGAGGACCAGGAAGAATTATCAGCAATTGTAGAGATAGGGCATCTTGTTCCTTAGGATAGTGATGATATGTAGTGCCAACAACTTGTAGGTCACAGTCTATTATGAGGTACCTGTATGGAAAGAAGCAACTACCACTCTATTTTGTGCATCAGATGCTAAAGCATATAAGACAGAGAAGCATAATGTTTGGGCAACCCATGCCTGTGTACCCACAGACCCCAGCCCACAGGAAGTAGCATTAGAAAATGTGACAGAAACATTTAACATGTGGAAAAATGACATGGTAGAGCAGATGCATGAGGATATAATCAGTTTATAAGATCAAAGCCTAAAGCCATGTGTGAAACTAACCCCACTCTGTGTTACTTTAAATTGCACTGATGAATTAAATCTAAATTGCCCTAACAATAATACTTGTAGTAATAATACTAAATATAATATGACGGAAGAAGGAGAAATAAAAAACTGCTCTTTCAATGTCACCACAGGAATAAGAGATAAGGTGACAAAAGAACATGCACTTTTCTATAAACTTGATGTAGTACCAATAGATGAAAGTAGTGGAAATACTACAGGCAAATATAGGATGATAAATTGTAACACCTCAGTCATTACACAAGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTTTTGTGCCCCGGCTAGTTTTGCGATTCTAAAGTGTAAAGATAAGAAATTCAATAGAACAGGAGAATGTAGAAATGTCAGCACAGTACAATGTACACATAGAATTAGGCCAGTAGTATCAACTCAACTGCTGCTGAACGGCAGTCTAGCAGAAGAAGAGATAGTAATTAGATCTGCCAATCTCTCGAACAATGCTAAAACCATAATAGTACAGCTGAATAAATCTGTAAAAATTAATTGTACAAGACCCAACAATAATACAAGAAGAAGTATACACATAGGACCAAGGAGGGCATTTTATAGAACAGACATAATAGGAGATATAAGACAAGCGCATTGTAACATTAGTAGGAAAGATTAGAATGACACTTTAAAACAGATAGTTATAAAATTAAAAGAAAAATTTAAGAATAAAACAATAGTCTTTAATCAATCCTCAGGAAAGGACCCAGAGATAGTGATGCATAGTTTTAATTGTAGAGAAGAATTTTTCTACTGTAATACAACACAGCTGTTTAATAGTACTTAGAATAATAATACTTAGAATGGTACTGATAATTAGAATAGTACTGAATCAAATAGCACTATCACACTCCCATGCAGAATAAAACAAATTATAAACTTGTGGCAGGAAGTAAGAAGAGCAATGTATGCCCCTCCCATCCAAGGGCAAATTAGATGTTCATCAAATATTACAGGGCTGCTGCTAGTAAGAGATAGTAAGAGCAACAATAGTAGTAATGATACAAAGACCTTCAGGCCTAGAAGAGGAGATATGAAGGACAATTAGAGAAGTGAATTATATAAATATAAAGTAGTCAAAATTGAACCATTAAGAATAGCACCCACCCATGCAAAGAGAAGAGTGGTGCAGAAAGAAAAAAGAGCAATAGGACTTAGAGCTTTCTTCCTTAAGTTCTTAGGAGCAGCAGGAAGCACTATAGGCGCAGCGTCAGTGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTAAGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTTTAGAAAGATACCTAAAAGATCAACAGCTCCTGAAGATTTGAGGTTGCTCTAGAAAACTCATTTGCACCACTAATGTGCCCTAAAATGTTAGTTAGAGCCCTAGATAGAATAGATCTCTAGATAAGATTTAGACTAACATGACCTAGAAGCAGTAGGAAAAAGAAATTGACAATTATACAGACACAATATATAACTTAATTGAACAATCACAGAACCAACAAGAACAGAATGAACAAGACTTATTAGCATTAGATAAGTAGGCAAGTTTGTAGAATTAGTTTGACATTACACAGTGGCTATAGTATATAAAAATATTCATAATGATAGTAAGAGGCTTGATAAGTTTAAGAATAGTTTTTACTATACTGTCTATAGTGAATAGAGTTAGGCAGAGATACTCACCATTGTCATTGCAGACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAAGAACAGAAGAAGGAGGTAGAGAGAAAGACAGAGGCACATCCACGAGATTAGTGCATAGATTCTTAGCACTCATCTAGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAGAGACTTACTCTTGATTGTGGCGAGGATTGTAGAACTTCTAAGACGCAGAAAGTAAGAAGCACTGAAGTATTGGTAGAATCTCCTGCAGTATTAGAGTCAGGAAATAAAGAATAGTGCAGTTAGCTTGCTCAACACCACAGCAATAGCAGTAGCTGAAAGGACAGATAAGATTATAGAAGTATTACAAAGAGGCTTTAGAGCTATTCTCCACATACCTACACGAATAAGACAGGGCTTAGAAAAAGCTTTGCTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 7810, - "end": 7902, - "orientation": "forward", - "distance": 0.6774193548387097, - "indel_impact": 0, - "protein": "RPSSQPRGDPTGPKEQKKEVERKTEAHPRD", - "nucleotides": "AGACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAAGAACAGAAGAAGGAGGTAGAGAGAAAGACAGAGGCACATCCACGAGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 7811, - "end": 8086, - "orientation": "forward", - "distance": 0.6195652173913044, - "indel_impact": 69, - "protein": "ILSTHLGRPAEPVPLQLPPLERLTLDCGEDCRTSKTQKVRSTEVLVESPAVLESGNKE", - "nucleotides": "GACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAAGAACAGAAGAAGGAGGTAGAGAGAAAGACAGAGGCACATCCACGAGATTAGTGCATAGATTCTTAGCACTCATCTAGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAGAGACTTACTCTTGATTGTGGCGAGGATTGTAGAACTTCTAAGACGCAGAAAGTAAGAAGCACTGAAGTATTGGTAGAATCTCCTGCAGTATTAGAGTCAGGAAATAAAGAATAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8230, - "end": 8871, - "orientation": "forward", - "distance": 0.8680555555555556, - "indel_impact": 40, - "protein": "MRQARPVRKPEPAATKVRAASRDLERHGALTSSNTAATNADVACLEAQQEEKEVGFPVRPQVPLRPMTYKGALDISHFLKEKGGLDGLIYSKRRQDILDLWLYNTQGYFPDWQNYTPRPGVRFPLCFRWCFKLVPVDPDKVEEASVGENNCLLSPENLHRIEDEHREVLQWRFDSRLAFHHVARELHPEYYKDC", - "nucleotides": "ATAGGTAACAAGTTGTCAAGAAGGCTCAGGGCTAGATGGCCTGCCATAAAAGAAAGAATGAGACAAGCTAGGCCAGTAAGAAAGCCAGAGCCAGCAGCAACTAAGGTAAGAGCAGCATCTCGAGACCTAGAAAGACATGGAGCACTTACAAGTAGCAATACAGCAGCTACCAATGCTGATGTTGCCTGCCTAGAAGCACAACAGGAAGAAAAAGAGGTAGGTTTTCCAGTCAGACCTCAGGTACCTTTAAGGCCAATGACTTACAAAGGAGCTCTAGATATTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGATGGGCTAATTTACTCCAAAAGAAGACAAGACATCCTTGATCTGTGGCTCTACAACACACAAGGCTACTTCCCTGACTGGCAGAACTACACACCAAGGCCAGGAGTCAGATTTCCACTGTGTTTTAGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGATAAGGTAGAAGAGGCCAGTGTAGGAGAGAACAACTGCTTGTTAAGCCCCGAAAACCTGCATAGAATAGAAGACGAACACAGAGAAGTATTGCAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTATTACAAGGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115490.1": [ - { - "region": "gag", - "start": 549, - "end": 2048, - "orientation": "forward", - "distance": 0.2675944333996021, - "indel_impact": 0, - "protein": "MGARASILSGGKLDSWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", - "nucleotides": "ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATTCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGTCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGTTAATGTAATGATGCAAAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1841, - "end": 4852, - "orientation": "forward", - "distance": 0.15109343936381703, - "indel_impact": 0, - "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVHQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACGTGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCCATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTGCTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4797, - "end": 5375, - "orientation": "forward", - "distance": 0.37823834196891193, - "indel_impact": 0, - "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPDLADQLIHLYYFDCFSESAIRNAILGHIVNPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", - "nucleotides": "ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACACATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTGACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAATCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5315, - "end": 5605, - "orientation": "forward", - "distance": 0.2886597938144331, - "indel_impact": 0, - "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", - "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5586, - "end": 5800, - "orientation": "forward", - "distance": 0.25, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5725, - "end": 5800, - "orientation": "forward", - "distance": 0.46153846153846145, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5817, - "end": 6062, - "orientation": "forward", - "distance": 0.6931034482758622, - "indel_impact": 0, - "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL", - "nucleotides": "ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 5980, - "end": 8520, - "orientation": "forward", - "distance": 0.528604118993135, - "indel_impact": 0, - "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTESHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSKEGKMAEEMRNCSFNITTEIRNKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKSVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", - "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGTCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAATATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTAAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAGAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATGATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAGTGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGGGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGGGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTATTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8102, - "end": 8194, - "orientation": "forward", - "distance": 0.29032258064516125, - "indel_impact": 0, - "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", - "nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8103, - "end": 8378, - "orientation": "forward", - "distance": 0.423913043478261, - "indel_impact": 0, - "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", - "nucleotides": "GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8522, - "end": 9136, - "orientation": "forward", - "distance": 0.5333333333333332, - "indel_impact": 0, - "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTYKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", - "nucleotides": "ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTACAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTACGAGCCTGCATGGGGTGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115576.1": [ - { - "region": "gag", - "start": 468, - "end": 1967, - "orientation": "forward", - "distance": 0.2675944333996021, - "indel_impact": 0, - "protein": "MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", - "nucleotides": "ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGACGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGCTAATGTAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1760, - "end": 4771, - "orientation": "forward", - "distance": 0.15109343936381703, - "indel_impact": 0, - "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTACTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4716, - "end": 5294, - "orientation": "forward", - "distance": 0.36269430051813467, - "indel_impact": 0, - "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", - "nucleotides": "ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5234, - "end": 5524, - "orientation": "forward", - "distance": 0.2886597938144331, - "indel_impact": 0, - "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", - "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5505, - "end": 5719, - "orientation": "forward", - "distance": 0.25, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5644, - "end": 5719, - "orientation": "forward", - "distance": 0.46153846153846145, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5736, - "end": 5981, - "orientation": "forward", - "distance": 0.727586206896552, - "indel_impact": 0, - "protein": "MQPLHIAAIVGLIVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL", - "nucleotides": "ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAATAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 5899, - "end": 8439, - "orientation": "forward", - "distance": 0.5247139588100684, - "indel_impact": 0, - "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTPEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", - "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCCCAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8021, - "end": 8113, - "orientation": "forward", - "distance": 0.29032258064516125, - "indel_impact": 0, - "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", - "nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8022, - "end": 8297, - "orientation": "forward", - "distance": 0.3913043478260869, - "indel_impact": 0, - "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", - "nucleotides": "GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8441, - "end": 9055, - "orientation": "forward", - "distance": 0.5333333333333332, - "indel_impact": 0, - "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", - "nucleotides": "ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTACGAGCCTGCATGGGATGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "OQ092466": [ - { - "region": "gag", - "start": 825, - "end": 2360, - "orientation": "forward", - "distance": 0.21325536062378148, - "indel_impact": 0, - "protein": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPSLLETAEGCRQILGQLQPSLQTGSEELKSLYNTLATLYCVHQRIEVKDTKEALEKIEEEQNKSKKKAQQAAADTGNSSQVRHTGNSSQVSQNYPIVQNLQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTSNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSHVTNSSAIMMQRGNFRNQRKAVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPPEEIFRFVEETTTPSQKQEPIDKELYPPLASLKSLFGNDPSSQ", - "nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTGGATAGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAACCCTAGCCTGTTAGAAACAGCAGAAGGCTGTAGACAAATATTGGGACAGTTACAACCGTCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATATAATACATTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGGTAAAAGACACCAAGGAAGCCTTAGAGAAGATAGAGGAAGAGCAAAACAAAAGCAAGAAAAAGGCACAGCAAGCAGCAGCTGACACAGGAAACAGCAGCCAGGTTAGACACACAGGAAACAGCAGCCAGGTCAGCCAAAATTACCCTATAGTACAGAACCTTCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAATACCATGCTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGACTACATCCAGTGCATGCAGGGCCCATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAGTAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGGATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTACACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATCTTAAAAGCATTGGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGGGGACCCGGACATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCACGTAACAAATTCAAGTGCCATAATGATGCAGAGGGGCAATTTTAGAAACCAAAGAAAGGCTGTTAAGTGTTTCAATTGTGGCAAAGAGGGGCATATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCACCACCAGAAGAGATCTTCAGGTTTGTGGAAGAGACAACAACTCCCTCTCAGAAACAGGAGCCAATAGACAAGGAACTGTATCCTCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 2147, - "end": 5164, - "orientation": "forward", - "distance": 0.14711729622266412, - "indel_impact": 0, - "protein": "FFRENLAFPQRKAREFSPEQTRANSPTTRRDLQVCGRDNNSLSETGANRQGTVSSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEDMNLPGRWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPYRTRNPEMVIYQYMDDLYVGSDLEIGQHRAKIEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPNKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGEGQWTFQIYQEPFKNLKTGKYARARGAHTNDVKQLTEAVQKIATEGIVIWGKIPKFRLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKKGKAGYVTDRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIEKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAIHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQIIKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCACCACCAGAAGAGATCTTCAGGTTTGTGGAAGAGACAACAACTCCCTCTCAGAAACAGGAGCCAATAGACAAGGAACTGTATCCTCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAGCTAAAGGAAGCTCTATTAGATACAGGGGCAGATGATACAGTATTAGAAGACATGAATTTACCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAGGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTATTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATAAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGCATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGTATGACAAAAATCTTAGAGCCTTATAGAACACGAAATCCAGAAATGGTTATCTATCAATACATGGATGATTTATATGTAGGATCTGACTTAGAAATAGGGCAACATAGAGCAAAAATAGAGGAACTGAGACAACATCTGTTGAAGTGGGGATTTACTACCCCAGACAAAAAACATCAAAAAGAACCTCCATTCCTTTGGATGGGGTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAAACAAAGACAGCTGGACTGTCAATGACATACAGAAACTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACCCAGGGATTAAGGTAAAGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAGTAGTACCGCTAACAGAAGAAGCAGAGTTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGAACCAGTACATGGGGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGGAAGGACAATGGACATTTCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAGCGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAGCCACAGAAGGCATAGTAATATGGGGAAAAATTCCTAAATTTAGACTACCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAACACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAACAGGGAAACTAAAAAAGGAAAAGCAGGATATGTTACTGACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGATACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATACGCATTGGGAATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTCAGTCAAATAATAGAACAGTTGATAAAAAAGGAAAAGGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTTAGTGCTGGAATCAGGAAAGTATTATTTTTAGATGGAATAGAGAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAGTGTCAGCTAAAAGGAGAAGCCATACATGGACAGGTAGACTGTAGTCCAGGAATATGGCAATTAGATTGTACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTCATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATACGGTTAAGGCCGCCTGTTGGTGGGCGGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATCCAAACCAAAGAACTACAAAAACAAATTATAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTTATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 5109, - "end": 5687, - "orientation": "forward", - "distance": 0.203125, - "indel_impact": 0, - "protein": "MENRWQVMIVWQVDRMRIRTWNSLVKHHMYVSRKAKGWFYRHHFESNHPKISSEVHIPLEDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPDLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKRKPPLPSVKKLTEDRWNKPQKTKGHRGSHTMNGH", - "nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGTATGTTTCAAGGAAAGCTAAGGGATGGTTTTATAGACATCACTTTGAAAGCAATCATCCAAAAATAAGTTCAGAAGTACACATCCCACTGGAGGATGCTAGACTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGGGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGACCTAGCAGACCAACTAATTCATCTGTACTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACACATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCACTAGCAGCATTAATAACACCAAAAAAGAGAAAGCCACCCTTGCCTAGTGTTAAGAAGCTAACAGAAGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5627, - "end": 5917, - "orientation": "forward", - "distance": 0.21875, - "indel_impact": 0, - "protein": "MEQAPEDQGPQREPYNEWTLELLEELKSEAVRHFPRIWLHSLGQYIYETYGDTWAGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRARNGASRS", - "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAGCTTAAGAGTGAAGCTGTTAGACATTTTCCTAGGATATGGCTCCATAGCTTAGGGCAATATATCTATGAAACTTATGGAGATACTTGGGCAGGGGTGGAAGCCATAATAAGAATTCTGCAACAACTGTTGTTTATTCATTTCAGAATTGGGTGTCAACATAGCAGAATAGGCATTATTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5898, - "end": 6112, - "orientation": "forward", - "distance": 0.41666666666666674, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPRTACTTCYCKKCCFHCQVCFTTKGLGISYGRKKRRQRRRAPQDSQTDQGPLPKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGGCTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTGCTTGTACCACTTGTTATTGTAAAAAATGCTGCTTTCATTGCCAAGTTTGTTTCACAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGACGAGCTCCTCAAGACAGTCAGACTGATCAAGGTCCTCTACCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 6037, - "end": 6112, - "orientation": "forward", - "distance": 0.46153846153846145, - "indel_impact": 0, - "protein": "MAGRSGDSDDELLKTVRLIKVLYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGACGAGCTCCTCAAGACAGTCAGACTGATCAAGGTCCTCTACCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 6129, - "end": 6374, - "orientation": "forward", - "distance": 0.6395348837209305, - "indel_impact": 0, - "protein": "MQPLTILAIVALVVAAILAIVVWSIVLIEYRKILRQRKIDRLIDRIRERAEDSGNESEGDQEELSALVERGHLAPWNVDDL", - "nucleotides": "ATGCAACCTTTAACAATATTAGCAATAGTAGCACTAGTAGTAGCAGCAATACTAGCAATAGTTGTGTGGTCCATAGTACTCATAGAATATAGGAAAATATTAAGACAAAGGAAAATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAAAGTGAGGGGGATCAGGAAGAATTATCAGCACTGGTGGAGAGGGGGCATCTTGCTCCTTGGAATGTTGATGATCTGTAA", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 6292, - "end": 8874, - "orientation": "forward", - "distance": 0.5852808988764047, - "indel_impact": 0, - "protein": "MKVRGIRKNYQHWWRGGILLLGMLMICNATEQQLWVTVYYGVPVWKEANTTLFCASDAKAYSTEVHNVWATHACVPTDPNPQEVVLKNVTENFNMWENNMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNTTRSSGNTTNEMKNCSFYTETDIRDKKRKEYALFYELDIVPIDEDNKNKSNNISYSRLISCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNDKKFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEDEVVIKSSNFTNNAKTIIVQLNETVKINCTRPNNNTRKSIPIGPGRAFYATGDIIGDIRQAHCNISRANWTNTLKQIAEKLGKQFEENKTIVFNPSSGGDPEVVMHSFNCRGEFFYCNSTPLFNSTWKETNGIWTRIGESNDSATITLNDSDTITLQCKIRQIINLWQEVGKAMYAPPIKGQISCLSNITGLLLVRDGGNNTNGTEIFRPVGGEMRDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRATLGALFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLRDQQLLGIWGCSGKLICTTTVPWNTSWSNKSLEKIWNNMTWMEWEREIDNYTSLIYTLLEESQNQQEKNEKELLELDTWASLWNWFDITNWLWYIKIFIMIIGGLVGLRIVFTVLSIVNRVRQGYSPLSFQIHPPAPRGPDRPEGIEGEGGERDRDRSGPLVNGFLALFWVDLRSLCLFSYHRLRDLLLIVARIVELLGRRGWEALKYGWSLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEILQRACRAILHIPTRIRQGLERALL", - "nucleotides": "ATGAAAGTGAGGGGGATCAGGAAGAATTATCAGCACTGGTGGAGAGGGGGCATCTTGCTCCTTGGAATGTTGATGATCTGTAATGCTACAGAACAACAATTGTGGGTTACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAAACACCACTCTATTTTGTGCATCAGATGCTAAAGCATATAGTACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTGGTATTAAAAAATGTGACAGAAAATTTTAATATGTGGGAAAATAACATGGTAGAACAGATGCATGAAGATATAATCAGTTTATGGGATCAAAGCTTAAAGCCATGTGTAAAGTTAACTCCACTCTGTGTTACTCTAAATTGCACTAATACCACTAGGAGTAGTGGAAATACTACCAATGAAATGAAAAACTGCTCTTTCTATACCGAAACAGACATAAGAGATAAGAAGAGAAAGGAATATGCACTTTTTTATGAACTTGATATAGTACCCATAGATGAGGATAATAAGAATAAGAGTAATAATATTAGCTATTCTAGGTTAATAAGTTGCAACACCTCAGTTATTACACAAGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAATGATAAGAAATTCAATGGAACAGGGCCATGTACAAATGTCAGCACAGTGCAATGTACACATGGTATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGACGAGGTAGTAATTAAATCTAGCAATTTCACGAACAATGCTAAAACCATAATAGTACAGCTAAATGAAACTGTAAAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGTATACCTATAGGACCAGGGAGAGCATTTTATGCAACAGGAGACATAATAGGAGACATAAGACAAGCACATTGTAACATCTCTAGAGCAAACTGGACAAACACTTTAAAACAGATAGCTGAAAAATTAGGAAAACAATTTGAGGAAAATAAAACAATAGTCTTTAATCCCTCCTCAGGAGGGGACCCAGAGGTTGTAATGCACAGTTTTAATTGTAGAGGGGAATTTTTCTACTGTAATTCAACACCACTGTTTAATAGTACTTGGAAGGAGACTAATGGGATTTGGACTCGTATTGGAGAGTCAAATGATAGTGCTACTATCACACTAAATGATAGTGATACTATCACACTCCAATGTAAAATAAGGCAAATTATAAACTTGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAAAGGACAAATTAGCTGTTTATCAAACATTACAGGGCTGCTATTAGTAAGAGATGGTGGCAATAACACGAACGGGACCGAGATCTTCAGACCTGTAGGAGGAGAAATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAGAGAGAGAAAAGAGCGACATTGGGAGCTCTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAGGCCAGACTATTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTAAGGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATCTGCACCACTACTGTGCCTTGGAATACTAGTTGGAGTAATAAATCTCTGGAAAAAATTTGGAATAATATGACCTGGATGGAGTGGGAAAGAGAAATTGACAATTACACAAGCTTAATATACACCTTACTTGAAGAATCGCAGAACCAGCAAGAAAAAAATGAAAAAGAATTATTGGAATTAGATACATGGGCAAGTTTGTGGAATTGGTTTGACATAACAAACTGGCTGTGGTATATAAAAATATTCATAATGATAATAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTACTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTCCAGATCCACCCCCCAGCTCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGGAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGTCCATTAGTGAACGGATTCTTAGCACTTTTCTGGGTCGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCTCTCAAATACGGGTGGAGTCTCCTACAATATTGGAGCCAGGAACTAAAGAATAGTGCTGTCAGCTTGCTCAACGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAATATTACAAAGAGCTTGTAGAGCTATTCTCCATATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCTTTGCTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8456, - "end": 8548, - "orientation": "forward", - "distance": 0.6774193548387097, - "indel_impact": 0, - "protein": "RSTPQLRGDPTGPKESKEKVERETETDPVH", - "nucleotides": "AGATCCACCCCCCAGCTCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGGAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGTCCATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8457, - "end": 8732, - "orientation": "forward", - "distance": 0.3586956521739131, - "indel_impact": 0, - "protein": "DPPPSSEGTRQARRNRRRRWRERQRQIRSISERILSTFLGRPAEPVPLQLPPLERLTLDCSEDCGTSGTQGVGSSQIRVESPTILEPGTKE", - "nucleotides": "GATCCACCCCCCAGCTCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGGAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGTCCATTAGTGAACGGATTCTTAGCACTTTTCTGGGTCGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCTCTCAAATACGGGTGGAGTCTCCTACAATATTGGAGCCAGGAACTAAAGAATAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8876, - "end": 9508, - "orientation": "forward", - "distance": 0.43380952380952387, - "indel_impact": 0, - "protein": "MGGKWSKSKLFGWPAVRERMRRAEPAAEPAADGVGAASRDLERHGAITSSNTPTTNAACAWLEAQEEEEVGFPVRPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSPKRQEILDLWVYHTQGFFPDWDNYTPGPGIRYPLTFGWCFKLVPVEPDKVEEANEGENNCLLHPMSQHGMEDPEREVLEWRFDSRLAFRHVARELHPEYYKDC", - "nucleotides": "ATGGGTGGCAAATGGTCAAAAAGTAAGCTATTTGGATGGCCTGCTGTAAGGGAAAGAATGAGAAGAGCTGAGCCAGCAGCAGAGCCAGCAGCAGATGGGGTGGGAGCAGCATCTCGAGACCTGGAAAGACACGGAGCAATCACAAGTAGCAATACACCAACTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAAGAGGAGGAGGTGGGTTTTCCAGTCAGACCTCAGGTACCTTTAAGACCAATGACTTACAAAGCGGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCCAAAAAGACAAGAGATCCTTGATCTGTGGGTCTATCATACACAAGGTTTCTTCCCTGATTGGGATAACTACACACCAGGGCCAGGGATCAGATATCCACTGACCTTTGGATGGTGCTTCAAGTTAGTACCAGTGGAGCCAGATAAGGTAGAAGAGGCCAATGAAGGAGAGAATAACTGCTTGTTACACCCTATGAGCCAGCATGGGATGGAGGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTGGCATTTCGTCACGTGGCCCGAGAGCTGCATCCGGAGTACTACAAGGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "OQ092463": [ - { - "region": "gag", - "start": 801, - "end": 2312, - "orientation": "forward", - "distance": 0.19009900990099005, - "indel_impact": 0, - "protein": "MGARASVLSGGELDRWERIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSKGCRQILEQLQPSLQTGSEELKSLYNTVATLYCVHQKIDVKDTKEALDKIEEEQNKSKKKTQQAAADTGNNSQTSQASQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKESFRDYVDRFYKTLRAEQCTQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTTPGTIMMQRGNFRNQRKNVKCFNCGKEGHIAKNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKQEPVDKELYPLASLRSLFGNDPSSQ", - "nucleotides": "ATGGGTGCGAGAGCGTCGGTATTAAGCGGGGGAGAATTAGATAGATGGGAAAGAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAGCGGTTCGCAGTTAATCCTGGCCTGTTAGAAACATCAAAAGGCTGTAGACAAATATTGGAACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAAGATAGATGTAAAAGACACCAAGGAAGCTTTAGATAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAACACAGCAAGCAGCAGCTGACACAGGAAACAACAGCCAGACCAGCCAGGCCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTGGGACTAAATAAAATAGTAAGGATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAATCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAACAATGTACACAGGAGGTAAAAAATTGGATGACAGAAACATTATTGGTCCAAAATGCAAACCCAGATTGCAAGACTATTTTAAAAGCATTAGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCTGGCCATAAAGCAAGAGTTTTGGCAGAAGCAATGAGCCAAGTAACAACTCCAGGTACCATAATGATGCAGAGAGGCAATTTCAGGAACCAGAGAAAGAATGTTAAGTGTTTCAACTGTGGCAAAGAAGGGCATATAGCTAAAAATTGCAGGGCCCCTAGAAAAAGGGGCTGTTGGAAATGTGGAAAAGAAGGACACCAAATGAAAGATTGCACTGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAGGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGGTAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 2105, - "end": 5116, - "orientation": "forward", - "distance": 0.10974155069582503, - "indel_impact": 0, - "protein": "FFRENLAFPQGEAREFSSEQTRANSPTRRELQVWGGDNNSLSEAGAGRQGTVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEDMTLPGRWKPKMIGGIGGFIKVRQYDQILVEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIDELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTREAELELAENREILKEPVHGVYYDPTKDLIAELQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQRETWDTWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIKKDKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDDHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVTTIHTDNGSNFISTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVYIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKLQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAGGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGGTAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGACATGACTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTTGTAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTAACTCAAATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATCTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATATAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAGTCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATAAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGATGAATTAAGACAACATCTGTTGAAGTGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGATATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGTTGCCAGAAAAGGACAGTTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAAGCAATTATGTAAACTCCTTAGGGGAACCAAGGCACTAACAGAAGTAGTACCACTAACAAGAGAAGCAGAGCTAGAGCTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCAACAAAAGACTTAATAGCAGAGTTACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGGAAATATGCAAGAATGAGAGGTGCCCACACTAATGATGTAAAGCAACTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAGTTTAAACTACCCATACAAAGAGAAACATGGGACACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAATTTGTCAATACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAGAAAGAACCTATAGTAGGAGCAGAAACTTTCTATGTAGATGGTGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGATACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTTAATCAAATAATAGAACAGTTAATAAAAAAGGACAAGGTATACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGACCATGAGAAATATCACAGTAATTGGAGGGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGCCAGTTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTGGATTGTACACACTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGGTATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTATCTTAAAATTAGCAGGAAGATGGCCAGTAACAACAATACATACAGACAATGGCAGCAATTTCATCAGTACTACAGTTAAGGCCGCCTGCTGGTGGGCGGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATCATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATACATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAACTTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 5061, - "end": 5639, - "orientation": "forward", - "distance": 0.28125, - "indel_impact": 0, - "protein": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYISRKAKGWVYKHHYDSIHPRISSEVHIPLGDAKLVITTYWGLHTGERDWHLGQGVSIEWRKERYSTQVDPGLADQLIHRYYFDCFSESAIRNAILGRIVSPSCDYQAGHNKVGSLQYLALAALITPKKIKPPLPSVKKLTEDRWNKPQKTKGHRGSHTMNGH", - "nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGGACATGGAAAAGTTTAGTAAAACACCATATGTATATTTCAAGGAAAGCTAAGGGATGGGTTTATAAGCATCACTATGACAGTATTCATCCAAGAATAAGTTCAGAAGTTCACATCCCACTAGGGGATGCGAAATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCACTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAGGAGAGATATAGCACACAAGTAGACCCTGGCCTAGCAGACCAACTAATTCATCGGTATTACTTTGATTGTTTTTCAGAATCTGCCATAAGAAATGCCATATTAGGACGTATAGTTAGTCCTAGTTGTGATTATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTAAGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5579, - "end": 5869, - "orientation": "forward", - "distance": 0.31958762886597936, - "indel_impact": 0, - "protein": "MEQAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHSLGQHIYETYGDTWAGVEAITRILQQLLFIHFRIGCQHSRIGIIQRRRARNGTSRS", - "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACACTTCCCTAGGCCATGGCTCCACAGCTTAGGGCAACATATCTATGAAACATATGGAGATACTTGGGCAGGAGTGGAAGCCATAACAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGAATTGGGTGTCAACATAGCAGAATAGGCATCATTCAACGGAGGAGAGCAAGAAATGGAACCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5850, - "end": 6064, - "orientation": "forward", - "distance": 0.20833333333333326, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPRTACTSCYCKQCCFHCQVCFITKGLGISYGRKKRRQRRRAHQDSQTHQASLSKX", - "nucleotides": "ATGGAACCAGTAGATCCTAGATTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTGCTTGTACCTCTTGTTATTGTAAGCAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5989, - "end": 6064, - "orientation": "forward", - "distance": 0.11538461538461542, - "indel_impact": 0, - "protein": "MAGRSGDSDEELIKTVRLIKLLYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 6081, - "end": 6329, - "orientation": "forward", - "distance": 0.5441860465116279, - "indel_impact": 0, - "protein": "MQPLEISAIVALVVAIIIAIVVWSIVLIEYRKILRQRKIDRIIDRIRERAEDSGNESEGDQEELSALVVEMGHDAPWDVNDL", - "nucleotides": "ATGCAACCTTTAGAAATATCAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTACTCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGATAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGCGAGGGGGATCAGGAAGAATTATCAGCACTTGTTGTGGAGATGGGGCACGATGCTCCTTGGGATGTTAATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 6244, - "end": 8826, - "orientation": "forward", - "distance": 0.49671201814058974, - "indel_impact": 0, - "protein": "MRARGIRKNYQHLLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFYASDAKAYGTEVHNVWATHACVPTDPNPQEVVLGNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNYNNTNTTSTKNTTPSTTASSGERMGEGEIKNCSFNITTSLRDKMQKEYALFYRPDIVPIDNDNTSYRLISCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKKFNGSGPCTNVSTIQCTHGIRPVVSTQLLLNGSLAEEEIVIRSENFSDNAKTIIVHLKDSVEITCIRPNNNTRKSIPMGPGKAFFATGAIIGDIRQAHCKINKTKWNNTLDQIFKKLREQFGNNKTIIFNHSSGGDPEIVTHNFNCGGEFFYCNTSKLFNSTWNETSSWKDESNGTITLPCRIKQIINLWQEVGKAMYAPPIRGLIKCSSNITGLLITRDGGKNNESNTTEIFRPEGGNMKDNWRSELYKYKVVKIEPLGIAPTKAKRRVVQREKRAVGIGAMFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKTLEQIWDNMTWMQWEREIDNYTSLIYTLIEESQNQQEKNELELLELDKWASLWNWFDITKWLWYIKIFIMIVGGLVGLRIVFAVLSIANRVRQGYSPLSFQTHPPAPRGPDRPEGIGEEGGERDRDRSGPLVNGFLAIIWVDLRSLCLFSYHRLRDLLLIVTRIVELLGRGGWELLKYWWNLLQYWSQELKNSAISLLNATAIAVAEGTDRIIEVLQRAGRAFLNIPRRIRQGLERALL", - "nucleotides": "ATGAGAGCGAGGGGGATCAGGAAGAATTATCAGCACTTGTTGTGGAGATGGGGCACGATGCTCCTTGGGATGTTAATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACTACCACTCTATTTTATGCATCAGATGCTAAAGCATATGGTACAGAGGTACATAATGTTTGGGCTACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGGAAATGTGACAGAGAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTGAATTGCACTAATTATAATAACACCAATACCACTAGTACCAAGAATACCACCCCTAGTACCACTGCTAGTAGCGGGGAAAGGATGGGGGAAGGAGAAATAAAAAACTGCTCTTTCAATATCACCACAAGCCTAAGAGATAAGATGCAGAAAGAATATGCACTCTTTTATAGACCTGATATAGTACCAATAGATAATGATAATACTAGTTATAGGTTGATAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCAGCTGGTTTTGCGATTCTAAAGTGTAACAATAAGAAGTTCAATGGATCAGGACCATGTACAAATGTCAGCACAATACAATGTACACATGGAATTAGGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTCTAGCAGAAGAAGAGATAGTAATTAGATCTGAAAATTTCTCAGACAATGCTAAAACCATAATAGTACATCTGAAGGATTCTGTAGAAATTACTTGTATAAGACCCAACAACAATACAAGAAAAAGTATACCTATGGGACCAGGGAAAGCATTTTTTGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAAAATTAATAAAACGAAATGGAATAACACTTTAGACCAGATATTTAAAAAGTTAAGAGAACAATTTGGGAATAATAAGACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTGTAACGCACAATTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACATCAAAACTGTTTAATAGTACTTGGAATGAGACTAGTTCTTGGAAGGATGAGTCAAATGGCACTATCACACTCCCATGCAGAATAAAGCAAATTATAAACCTGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGACTCATTAAATGTTCATCAAATATCACAGGGCTGCTAATAACAAGAGATGGTGGGAAAAATAATGAGAGCAACACCACCGAGATTTTCAGACCTGAAGGAGGAAATATGAAGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAATAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTATGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATAACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAAACTCTGGAACAGATTTGGGATAACATGACCTGGATGCAATGGGAAAGAGAAATTGACAATTACACAAGCTTGATATACACTTTAATTGAAGAATCGCAAAACCAACAAGAAAAGAATGAACTAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTGACATAACAAAGTGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTGCTTTCTATAGCGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTTCAGACCCACCCCCCAGCCCCGAGGGGACCCGACAGGCCAGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGACCATTAGTGAACGGATTCTTAGCAATTATCTGGGTCGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCGGGGGGTGGGAACTCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAAGAATAGTGCTATTAGCTTGCTTAATGCTACAGCCATAGCAGTAGCTGAGGGGACAGACAGGATTATAGAAGTATTACAAAGAGCTGGGAGAGCTTTTCTCAACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGGCTTTGCTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8408, - "end": 8500, - "orientation": "forward", - "distance": 0.59375, - "indel_impact": 0, - "protein": "RPTPQPRGDPTGQKESEKKVERETETDPDH", - "nucleotides": "AGACCCACCCCCCAGCCCCGAGGGGACCCGACAGGCCAGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGACCATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8409, - "end": 8684, - "orientation": "forward", - "distance": 0.326086956521739, - "indel_impact": 0, - "protein": "DPPPSPEGTRQARRNRRRRWRERQRQIRTISERILSNYLGRPAEPVPLQLPPLERLTLDCNEDCGTSGTRGVGTPQVLVESPAVLESGTKE", - "nucleotides": "GACCCACCCCCCAGCCCCGAGGGGACCCGACAGGCCAGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGACCATTAGTGAACGGATTCTTAGCAATTATCTGGGTCGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCGGGGGGTGGGAACTCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAAGAATAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8828, - "end": 9460, - "orientation": "forward", - "distance": 0.333809523809524, - "indel_impact": 0, - "protein": "MGGKWSKSSVVGWPAIRERMRRAEPAAEPAADGVGAASRDLEKHGAITSSNTAATNADCAWLEAQEEEEVGFPVRPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIYSQKRQDILDLWVYHTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPVEPDKVEEANEGENNSLLHPMSLHGMDDPEKEVLVWRFDSRLAFHHMARELHPEYYKDC", - "nucleotides": "ATGGGTGGTAAGTGGTCAAAAAGTAGTGTGGTTGGATGGCCTGCTATAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGAGCCAGCAGCAGATGGGGTGGGAGCAGCATCTAGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCAGGCCTCAGGTACCTTTAAGACCAATGACTTATAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGTTAATTTACTCCCAGAAAAGACAAGATATCCTTGATCTGTGGGTCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATAAGATATCCACTGACCTTTGGATGGTGCTTCAAGCTAGTGCCAGTTGAGCCAGATAAGGTAGAAGAGGCCAATGAAGGAGAGAACAACAGCTTGTTACACCCTATGAGCCTGCATGGGATGGATGACCCAGAGAAAGAAGTGTTAGTATGGAGATTTGACAGTCGCCTGGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAGTACTACAAGGATTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "OQ092465": [ - { - "region": "gag", - "start": 855, - "end": 2357, - "orientation": "forward", - "distance": 0.20237623762376278, - "indel_impact": 0, - "protein": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSKGCRQILEQLQPSLQTGSEELKSLYNTVATLYCVHQQIDVKDTKEALDKIEEEQNKSKKKVQQAATDTGNNSQASQNYPIVQNLQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKESFRDYVDRFYKTLRAEQCTQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTTPGAIMMQRGNFRSQRKTVKCFNCGKEGHIAKNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPAESFRFGEETTTPSQKQEPIDKELYPLASLRSLFGNDPSSQ", - "nucleotides": "ATGGGTGCGAGAGCGTCGGTATTAAGCGGGGGAGAATTAGATAGGTGGGAAAAAATTCGGTTAAGGCCAGGAGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAACTAGAACGGTTTGCAGTTAATCCTGGCCTGTTAGAAACATCAAAAGGCTGTAGACAAATATTGGAACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAACAGATAGATGTAAAAGACACCAAGGAAGCTTTAGATAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGTACAGCAAGCAGCAACTGACACAGGAAACAACAGCCAGGCCAGCCAAAATTACCCTATAGTGCAGAATCTCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGACTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGGATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAATCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAACAATGTACACAGGAGGTAAAAAATTGGATGACAGAAACATTATTGGTCCAAAATGCAAACCCAGACTGCAAGACTATTTTAAAAGCATTAGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCTGGCCATAAAGCAAGAGTTTTGGCAGAAGCAATGAGCCAAGTAACAACTCCAGGTGCCATAATGATGCAGAGAGGCAATTTCAGGAGCCAAAGAAAGACTGTTAAGTGTTTCAACTGTGGCAAAGAAGGGCATATAGCTAAAAATTGCAGGGCCCCTAGAAAAAGGGGCTGTTGGAAATGTGGAAAAGAAGGACACCAAATGAAAGATTGCACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTCGGGGAGGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 2150, - "end": 5161, - "orientation": "forward", - "distance": 0.11570576540755484, - "indel_impact": 0, - "protein": "FFREDLAFPQGEAREFSSEQTRANSPTSRELQVRGGDNNSLSEAGADRQGTVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEDMTLPGRWKPKMIGGIGGFIKVRQYDQILVEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIDELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCRLLKGTKALTEVVPLTREAELELAENREILKEPVHGVYYDPTKDLIAELQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQRETWDTWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIKKDKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDDHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKTIHTDNGGNFISTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVYIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKLQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTCGGGGAGGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGACACAGGAGCAGATGATACAGTATTAGAAGACATGACTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTTGTAGAAATCTGTGGACATAAAGCTATAGGTACAGTTTTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAAATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATCTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATATAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATAAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGATGAATTAAGACAACATCTGTTGAAGTGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGGTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGTTGCCAGAAAAAGACAGTTGGACTGTTAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAGGTAAAGCAATTATGTAGACTCCTTAAGGGAACCAAGGCACTAACAGAAGTAGTACCACTAACAAGAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCAACAAAAGACTTAATAGCAGAGTTACAGAAGCAGGGACAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGAGGTGCCCACACTAATGATGTAAAACAGCTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAGTTTAAACTACCCATACAAAGAGAAACATGGGACACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTTTATGTAGATGGTGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACCACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATCCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTTAATCAAATAATAGAACAGTTAATAAAAAAGGACAAAGTATACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGATCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGCCAGTTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTGGATTGTACACACTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTATCTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGTGGCAATTTCATCAGTACTACAGTTAAGGCCGCCTGTTGGTGGGCGGGGATCAAGCAGGAATTTGGCATACCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATCATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATACATACACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAACTTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 5106, - "end": 5684, - "orientation": "forward", - "distance": 0.328125, - "indel_impact": 0, - "protein": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYISKKAKGWIYKHHYDSINPKISSEVHIPLGDAKLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPGLADRLIHLYYFDCFSESAIRNAILGRIVSPSCDYQAGHNKVGSLQYLALTALITPKKIKPPLPSVKKLTEDRWNKPQKTKGHRGSHTMNGH", - "nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGGACATGGAAAAGTTTAGTAAAACACCATATGTATATTTCAAAGAAAGCTAAGGGATGGATTTATAAGCATCACTATGACAGTATTAATCCAAAAATAAGTTCAGAAGTTCACATCCCACTAGGGGATGCAAAATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTGGCCTAGCAGACCGACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACGTATAGTTAGTCCTAGTTGTGATTATCAAGCAGGACATAACAAGGTAGGGTCTCTACAGTACTTGGCACTAACAGCACTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTAAGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5624, - "end": 5914, - "orientation": "forward", - "distance": 0.25773195876288657, - "indel_impact": 0, - "protein": "MEQAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHSLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCQHSRIGILQRRRARNGASRS", - "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACACTTTCCTAGGCCATGGCTACACAGCTTAGGGCAACATATCTATGAAACATATGGAGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGAATTGGGTGTCAACATAGCAGAATAGGCATCCTTCAACGGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5895, - "end": 6109, - "orientation": "forward", - "distance": 0.20833333333333326, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPRTACTSCYCKQCCFHCQVCFITKGLGISYGRKKRRQRRRAHQDSQTHQASLSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGATTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTGCTTGTACCTCTTGTTATTGTAAGCAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 6034, - "end": 6109, - "orientation": "forward", - "distance": 0.11538461538461542, - "indel_impact": 0, - "protein": "MAGRSGDSDEELIKTVRLIKLLYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 6126, - "end": 6374, - "orientation": "forward", - "distance": 0.6183908045977013, - "indel_impact": 0, - "protein": "MQPLVISAIVALVVVAIIAIVVWSIVLIEYRKILRQRKIDRIIDRIRERAEDSGNESEGDQEELSALVVEMGHDAPWDVNDL", - "nucleotides": "ATGCAACCTTTAGTAATATCAGCAATAGTAGCATTAGTAGTAGTAGCGATAATAGCAATAGTTGTGTGGTCCATAGTACTCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGATAATTGATAGAATAAGGGAAAGAGCAGAAGACAGTGGCAATGAGAGCGAGGGGGATCAGGAAGAATTATCAGCACTTGTTGTGGAGATGGGGCACGATGCTCCTTGGGATGTTAATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 6289, - "end": 8880, - "orientation": "forward", - "distance": 0.5078917700112742, - "indel_impact": 0, - "protein": "MRARGIRKNYQHLLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYGTEVHNVWATHACVPTDPNPQEVVLGNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNYNNTGANNTTSTNTTTPSTTVSSEERMGEGEIKNCSFNITTSLRDKMQKEYALFYRPDIVPIDNTSYRLISCNTSVITQACPKVTFEPIPIHYCAPAGFAILKCNNKTFNGSGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEIVIRSENFSDNAKNIIVHLNKSVEITCIRPNNNTRKSIPMGPGKAFFATGAIIGDIRQAHCKINKTKWNNTLEQVFKKLREQFGNNKTIIFNHSSGGDPEIVTHNFNCGGEFFYCNTSKLFNSTWNETSYWKGEGSNNDTITLPCRIKQIINLWQEVGKAMYAPPIRGLIKCSSNITGLLLTRDGGNESETTDTETFRPVGGNMKDNWRSELYKYKVVKIEPLGIAPTKAKRRVVQREKRAVGLGAMFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKNLTQIWDNMTWMQWEKEIDNYTSLIYTLIEESQNQQEKNELELLELDKWASLWNWFDITKWLWYIKIFIMIVGGLVGLRIVFAVLSIANRVRQGYSPLSFQTHLPAPRGPDRPEGIEGEGGERDRDRSGPLVNGFLAIIWVDLRSLCLFSYHRLRDLLLIVTRIVELLGRGGWELLKYWWNLLQYWSQELKNSAISLLNATAIAVAEGTDRIIEVLQRAGRAFLHIPRRIRQGLERALL", - "nucleotides": "ATGAGAGCGAGGGGGATCAGGAAGAATTATCAGCACTTGTTGTGGAGATGGGGCACGATGCTCCTTGGGATGTTAATGATCTGTAGCGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACTACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGGTACAGAGGTACATAATGTGTGGGCTACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGGAAATGTGACAGAGAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGTCTAAAACCATGTGTAAAATTAACCCCACTCTGTGTTACTTTGAATTGCACTAATTATAATAACACTGGTGCCAATAATACCACTAGTACCAATACTACCACCCCTAGTACCACTGTTAGTAGCGAGGAAAGGATGGGGGAAGGAGAAATAAAAAACTGCTCTTTCAATATCACCACAAGCCTAAGAGATAAGATGCAGAAAGAATATGCACTCTTTTATAGACCTGATATAGTACCAATAGATAATACTAGTTATAGGTTGATAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTAACCTTTGAGCCAATTCCCATACATTATTGTGCCCCAGCTGGTTTTGCGATTCTAAAGTGTAACAATAAGACGTTCAATGGATCAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTCTAGCAGAAGAAGAGATAGTAATTAGATCTGAAAATTTCTCAGACAATGCTAAAAACATAATAGTACATCTGAATAAATCTGTAGAAATTACTTGTATAAGACCCAACAACAATACAAGAAAAAGTATACCTATGGGACCAGGGAAAGCATTTTTTGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAAAATTAATAAAACGAAATGGAATAACACTTTAGAGCAGGTATTTAAAAAGTTAAGAGAACAATTTGGGAATAATAAGACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTGTAACGCACAATTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACATCAAAACTGTTTAATAGTACTTGGAATGAGACTAGTTATTGGAAGGGTGAAGGGTCAAACAATGACACTATCACACTCCCATGCAGAATAAAGCAAATTATAAACCTGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGAGGACTCATTAAATGTTCATCAAATATCACAGGGCTACTATTAACAAGAGATGGTGGTAACGAGAGCGAGACCACCGACACTGAGACCTTCAGACCTGTAGGAGGAAATATGAAGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAATAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGATTAGGAGCTATGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATAACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAAAATCTGACACAGATTTGGGATAACATGACCTGGATGCAATGGGAAAAAGAAATTGACAATTACACAAGCTTAATATACACCTTAATTGAAGAATCGCAAAACCAACAAGAAAAGAATGAACTAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTGACATAACAAAGTGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTGCTTTCTATAGCGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCAGAAGGAATCGAAGGAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGACCATTAGTGAACGGATTCTTAGCAATTATCTGGGTCGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCGGGGGGTGGGAACTCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAAGAATAGTGCTATTAGCTTGCTTAATGCTACAGCCATAGCAGTAGCTGAGGGGACAGATAGGATTATAGAAGTATTACAAAGAGCAGGGAGAGCTTTTCTCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGGCTTTGCTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8462, - "end": 8554, - "orientation": "forward", - "distance": 0.5, - "indel_impact": 0, - "protein": "RPTSQPRGDPTGQKESKEKVERETETDPDH", - "nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCAGAAGGAATCGAAGGAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGACCATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8463, - "end": 8738, - "orientation": "forward", - "distance": 0.326086956521739, - "indel_impact": 0, - "protein": "DPPPSPEGTRQARRNRRRRWRERQRQIRTISERILSNYLGRPAEPVPLQLPPLERLTLDCNEDCGTSGTRGVGTPQVLVESPAVLESGTKE", - "nucleotides": "GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCAGAAGGAATCGAAGGAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGACCATTAGTGAACGGATTCTTAGCAATTATCTGGGTCGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCGGGGGGTGGGAACTCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAAGAATAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8882, - "end": 9514, - "orientation": "forward", - "distance": 0.333809523809524, - "indel_impact": 0, - "protein": "MGGKWSKSSVVGWPKIRERMRRAEPAAEPAADGVGAASRDLEKHGAITSSNTAATNADCAWLEAQEEEEVGFPVRPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIYSQKRQDILDLWVYHTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPVEPDKVEEANEGENNSLLHPMSLHGMDDPEKEVLVWRFDSRLAFHHMARELHPEYYKDC", - "nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGGTTGGATGGCCTAAAATAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGAACCAGCAGCAGATGGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCAGGCCTCAGGTACCTTTAAGACCAATGACTTATAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAGAAAAGACAAGATATCCTTGATCTGTGGGTCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATCAGATATCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGAGCCAGATAAGGTAGAAGAGGCCAATGAAGGAGAGAACAACAGCTTGTTACACCCTATGAGCCTGCATGGGATGGATGACCCAGAGAAAGAAGTGTTAGTATGGAGATTTGACAGTCGCCTGGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAGTACTACAAGGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "OQ092462": [ - { - "region": "gag", - "start": 767, - "end": 2269, - "orientation": "forward", - "distance": 0.2870775347912524, - "indel_impact": 0, - "protein": "MGARASVLSGGELDKWEKIRLRPGGKKKYQLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPALQTGSEELKSLFHTVATLYCVHQKIEVKDTKEALEKIEEEQKKSKKKAQQAAADTGNNSQVSQNYPIVQNMQGQMVHQPLSPRTLNAWVKVIEDKAFSPEVIPMFTALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVQAGPVAPGQLREPRGSDIAGTTSNLQEQIAWMTHNPPVPVGEIYKRWILLGLNKIVRMYSPVSILDIKQGPKEPFRDYVDRFYKTLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQVANPASIMMQRGNFRNQRKPIKCFNCGKEGHIAKNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKVWPSRKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKPEPIDKELYPLASLRSLFGNDPSSQ", - "nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGACAAATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATCAGCTAAAACATATAGTATGGGCAAGCAGGGAACTAGAGCGATTTGCAGTTAATCCCGGCCTGTTAGAAACATCGGAGGGCTGTAGACAAATACTAGGGCAACTACAGCCCGCTCTTCAGACAGGATCAGAAGAACTTAAATCACTATTTCATACAGTAGCAACCCTCTATTGTGTGCATCAAAAGATAGAGGTGAAAGACACCAAAGAAGCTTTAGAGAAGATAGAGGAAGAGCAAAAGAAAAGTAAGAAAAAGGCACAGCAAGCAGCCGCTGACACAGGAAATAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATGCAGGGACAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAATAGAAGATAAGGCTTTCAGTCCAGAAGTAATACCCATGTTTACAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAATACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGATTACATCCAGTGCAGGCAGGGCCTGTTGCGCCAGGCCAGCTACGAGAGCCAAGGGGAAGTGACATAGCAGGAACTACTAGTAACCTTCAGGAGCAAATAGCATGGATGACACATAATCCACCTGTCCCAGTAGGAGAAATCTATAAAAGATGGATACTTCTGGGATTAAATAAAATAGTAAGAATGTACAGCCCCGTCAGCATTCTGGACATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCTGAGCAGGCTTCACAGGATGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCGAACCCAGATTGTAAGACTATTTTGAAAGCATTAGGACCAGCAGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCAGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAGCAAATCCAGCTAGCATAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGCCTATTAAGTGTTTCAACTGTGGCAAAGAGGGGCATATTGCTAAAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGGTCTGGCCTTCCCGCAAGGGGAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACGACAACTCCCTCTCAGAAACCGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 2062, - "end": 5073, - "orientation": "forward", - "distance": 0.1494023904382471, - "indel_impact": 0, - "protein": "FFREGLAFPQGEAREFPSEQTRANSPTRRELQVWGRDDNSLSETGADRQGTVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMNLPGKWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALTEICTEMEKEGKISKIGPENPYNTPIFAIKKKDGNKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRAKIEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPILLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVVPLTAEAELELAENREILKEPVHGVYYDPSKDLIAELQKQGQGQWTYQIYQEPYKNLKTGKYARMRGTHTNDIKQLTEAVQKIATESIVIWGKTPKFKLPIQKETWEAWWIEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDKGRQKVVPLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPIVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLKGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQEVIESMNKELKKIIGQVRDQAEHLKTAVQMAVFVHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAGGTCTGGCCTTCCCGCAAGGGGAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACGACAACTCCCTCTCAGAAACCGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGAGGGCAACTAAAGGAAGCTCTATTAGACACAGGAGCAGATGATACAGTTTTAGAAGAAATGAATTTGCCAGGAAAATGGAAGCCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTGTTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAAATTGGCTGCACTTTAAATTTTCCCATTAGCCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAGAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAACAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATCTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAATATTTGCCATAAAGAAGAAAGATGGTAATAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCTGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATAAGGACTTCAGGAAGTATACTGCATTTACAATACCTAGCACAAACAATGAGACACCAGGGATTAGATACCAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATTTTGGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATTTATCAATACATGGATGATTTATATGTAGGGTCTGACTTAGAAATAGGGCAGCATAGAGCAAAAATAGAGGAACTGAGACAACATCTATTGAAGTGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCCGATAAATGGACAGTACAGCCTATATTGCTGCCAGAAAAAGACAGCTGGACTGTTAATGACATACAGAAGTTAGTGGGAAAATTGAACTGGGCAAGTCAGATTTACCCAGGAATCAAAGTAAGGCAGCTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAGTGGTACCACTAACAGCAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGATTTAATAGCAGAACTACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATATAAAAATCTGAAAACAGGAAAGTATGCAAGAATGAGAGGTACCCACACTAATGATATAAAACAATTAACAGAGGCAGTGCAAAAAATAGCTACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAATTACCCATACAAAAGGAAACATGGGAAGCATGGTGGATAGAGTATTGGCAAGCCACCTGGATTCCTGAATGGGAGTTTGTCAATACCCCTCCCTTAGTGAAACTATGGTACCAGTTGGAAAAAGAACCCATAGTGGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGGGAAACTAAATTAGGAAAAGCAGGATATGTTACTGACAAAGGAAGACAAAAAGTTGTCCCCCTAACGGACACAACAAATCAGAAGACTGAGCTACAAGCAATTCATCTAGCTTTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAACTAGTCAGTCAAATAATAGAGCAGTTAATAAAGAAGGAAAAGGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAGTTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGAAGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGACTTTAACCTGCCACCTATAGTAGCAAAGGAAATAGTAGCCAGTTGTGACAAATGTCAGTTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCTGGAATATGGCAGCTAGATTGTACACATCTAAAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAAGAAACAGCATACTTTCTCTTGAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCTGCCTGTTGGTGGGCGGGGATCAAACAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGAAGTAATAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCAGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCGTCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACCAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATTATCAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 5018, - "end": 5596, - "orientation": "forward", - "distance": 0.34375, - "indel_impact": 0, - "protein": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMHISRKAKNWLYRHHYESIHPRISSEVHIPLGDARLVITTYWGLLTGERDWHLGQGVSIEWRERRYRTQVDPDLADQLIHLYYFDCFSESAIRNALLGRVVSPRCEYQAGHNQVGSLQYLALTALITPKRRKPPLPSVRKLTEDRWNKPQKTKGHKGSHTMNGH", - "nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAGAGTTTAGTAAAACATCATATGCATATATCAAGGAAAGCTAAGAATTGGTTGTATAGACATCACTATGAAAGCATTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTAGTAATAACAACATATTGGGGTCTGCTTACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGGAAAGGAGATATAGAACACAAGTAGACCCTGACCTAGCAGACCAACTAATTCATCTGTATTATTTTGATTGTTTTTCAGAGTCTGCTATAAGAAATGCCTTATTAGGACGTGTAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACCAGGTAGGATCTCTACAGTACTTGGCACTAACAGCATTAATAACACCAAAAAGGAGAAAGCCACCTTTGCCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGGCACTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5536, - "end": 5826, - "orientation": "forward", - "distance": 0.21875, - "indel_impact": 0, - "protein": "MEQAPEDQGPQREPYNEWALELLEELKSEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGINLQRRARNGSSRS", - "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGGCACTAGAACTTTTAGAGGAACTTAAAAGTGAAGCTGTTAGACATTTTCCTAGGATATGGCTTCATGGATTGGGACAGCATATCTATGAAACATATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGAATTGGATGTCGACATAGCAGAATAGGCATTAATCTACAGAGGAGAGCAAGGAATGGATCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5807, - "end": 6021, - "orientation": "forward", - "distance": 0.375, - "indel_impact": 0, - "protein": "MDPVDPSLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFIKKGLGISYGRKKRRQRRRASQGSETHQVSLPKX", - "nucleotides": "ATGGATCCAGTAGATCCTAGCCTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAGACTGCTTGTACCAATTGTTATTGTAAAAAGTGCTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGGCAGTGAGACTCATCAAGTTTCTCTACCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5946, - "end": 6021, - "orientation": "forward", - "distance": 0.46153846153846145, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLKAVRLIKFLYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGGCAGTGAGACTCATCAAGTTTCTCTACCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 6038, - "end": 6283, - "orientation": "forward", - "distance": 0.7471264367816093, - "indel_impact": 0, - "protein": "MRPLEIAAIVALVVAVLIAIVVWSIVLIEYRKILRQRKIDRIVDRIRERAEDSGNESEGDQEELSALVERGHLAPWDIDDL", - "nucleotides": "ATGCGACCTTTAGAAATAGCAGCAATAGTAGCACTAGTAGTAGCAGTACTAATAGCAATAGTTGTGTGGTCTATAGTACTCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGATAGTGGATAGAATAAGAGAAAGAGCAGAAGATAGTGGAAATGAGAGTGAAGGGGATCAGGAGGAATTATCAGCACTGGTGGAGAGGGGGCACCTTGCTCCTTGGGATATTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 6201, - "end": 8777, - "orientation": "forward", - "distance": 0.6409745293466222, - "indel_impact": 33, - "protein": "MRVKGIRRNYQHWWRGGTLLLGILMICSATENLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPSPQEVVLKNVTEKFNMWKNNMVEQMHQDIISLWDESLKPCVKLTPLCVTLNCTNATISGNATEEIKNCSFNVNTKIGGKKQKERALFYKLDVVPIDDDSTNRTNTSYRLISCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCGDKEFNGTGLCRNVSTVQCTHGIRPVVSTQLLLNGSLAEGEVVIKSENITNNVKTIIVQLNETVSINCTRPSNNTRRSIHMGPGRAFYATGEIIGDIRKAQCILNKTDWSDTLTRIAKKLHKQFHGPIAFEQSSGGDPEITMHTFNCGGEFFYCNTSALFSGTWNGTAWTNATWGNIAGNNITLQCRIKQFINRWQEVGKAMYAPPIRGEIRCSSNITGLLLTRDGGSNTTNGGENGTQIGENVTQIFRPGGGDMRDNWRSELYKYKVVKIEPLGIAPTKAKRRVVQREKRAVTFGALFLGFLGAAGSTMGAASMTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLRDQQLLGIWGCSGKLICTTTVPWNASWSNKTLEKIWGNMTWMEWEREIDNYTDLIYTLIEQSQNQQEKNEQELLELDKWAGLWNWFDITNWLWYIKIFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTRLPASRGPDRPEGTDEEGGERDRDRSGSLVNGFLALIWIDLRSLFLFSYLRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNAIAISVAEGTDRIIEAIQRICRAILHIPTRIRQGLERALL", - "nucleotides": "ATGAGAGTGAAGGGGATCAGGAGGAATTATCAGCACTGGTGGAGAGGGGGCACCTTGCTCCTTGGGATATTGATGATCTGTAGTGCCACAGAGAACTTGTGGGTCACAGTCTACTATGGGGTACCTGTGTGGAAAGAAGCAACCACCACTTTATTTTGTGCATCAGATGCCAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAGCCCACAAGAAGTAGTATTGAAAAATGTGACAGAAAAGTTTAATATGTGGAAAAATAACATGGTAGAACAGATGCATCAGGATATAATCAGTTTATGGGATGAAAGCCTAAAACCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATGCTACTATCAGTGGTAATGCAACAGAAGAAATAAAAAACTGCTCTTTCAATGTCAATACAAAAATAGGAGGTAAGAAGCAGAAAGAACGTGCACTTTTTTATAAACTTGATGTAGTACCAATAGATGATGATAGTACTAATAGGACTAATACCAGCTATAGGTTGATAAGTTGTAACACTTCAGTCATTACACAAGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTGGAGATAAAGAGTTCAATGGAACAGGACTATGTAGAAATGTCAGCACAGTCCAATGTACACATGGAATCAGGCCAGTAGTATCAACTCAATTGCTGTTGAATGGCAGTCTAGCAGAAGGAGAGGTAGTAATTAAATCTGAAAATATCACGAACAATGTTAAAACCATAATAGTACAGCTAAATGAAACTGTATCAATTAATTGTACAAGACCTAGCAACAATACAAGAAGAAGCATACATATGGGACCAGGGAGAGCCTTTTATGCAACAGGAGAAATAATAGGAGATATAAGGAAAGCACAGTGTATCCTGAATAAGACAGACTGGAGTGACACTTTAACAAGGATAGCTAAAAAATTACACAAGCAATTTCATGGACCAATAGCATTTGAGCAATCCTCAGGAGGGGACCCTGAAATTACAATGCACACTTTTAATTGTGGAGGGGAATTTTTCTACTGCAACACATCAGCGTTGTTTAGCGGGACCTGGAATGGTACTGCTTGGACTAATGCTACTTGGGGTAATATTGCAGGTAACAATATCACACTCCAATGCAGAATAAAACAATTTATAAACAGATGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGAGGAGAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGCAGTAACACAACAAATGGTGGCGAGAATGGGACCCAGATTGGCGAGAATGTGACCCAGATCTTCAGACCTGGAGGAGGGGATATGAGGGACAATTGGAGAAGTGAATTATACAAATATAAAGTAGTAAAAATTGAGCCATTAGGAATAGCACCCACCAAGGCAAAAAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTAACATTCGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATGACACTGACGGTACAGGCCAGACTATTATTGTCTGGTATAGTGCAACAGCAGAACAATCTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAACAGCTTCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAGGGATCAACAGCTCCTAGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTACTGTGCCTTGGAATGCTAGTTGGAGTAATAAAACTCTGGAAAAAATTTGGGGGAACATGACCTGGATGGAGTGGGAGAGAGAAATTGACAATTATACAGACTTAATATACACCTTAATTGAACAATCGCAGAACCAACAAGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAGGCTTGTGGAATTGGTTTGACATAACAAACTGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGTTTAGTAGGTTTAAGAATAGTTTTTGCTGTGCTTTCTATAGTAAATAGAGTTAGGCAGGGATACTCACCATTATCATTCCAGACCCGCCTCCCAGCCTCGAGGGGACCCGACAGGCCCGAAGGAACCGACGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGATCATTAGTGAACGGATTCTTAGCACTCATCTGGATCGACCTGCGGAGTCTGTTCCTCTTCAGCTACCTCCGCTTGAGAGACTTACTCTTGATTGCAGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAGGCCCTCAAATATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGTTTGCTTAATGCTATAGCTATATCAGTAGCGGAGGGAACAGATAGGATTATAGAAGCAATACAAAGAATTTGTAGAGCTATCTTACACATACCTACAAGGATAAGACAGGGCTTAGAAAGGGCTTTGCTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8359, - "end": 8451, - "orientation": "forward", - "distance": 0.40625, - "indel_impact": 0, - "protein": "RPASQPRGDPTGPKEPTKKVERETETDPDH", - "nucleotides": "AGACCCGCCTCCCAGCCTCGAGGGGACCCGACAGGCCCGAAGGAACCGACGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGATCATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8360, - "end": 8635, - "orientation": "forward", - "distance": 0.3913043478260869, - "indel_impact": 0, - "protein": "DPPPSLEGTRQARRNRRRRWRERQRQIRIISERILSTHLDRPAESVPLQLPPLERLTLDCSEDCGTSGTQGVGGPQILVESPAVLESGTKE", - "nucleotides": "GACCCGCCTCCCAGCCTCGAGGGGACCCGACAGGCCCGAAGGAACCGACGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGATCATTAGTGAACGGATTCTTAGCACTCATCTGGATCGACCTGCGGAGTCTGTTCCTCTTCAGCTACCTCCGCTTGAGAGACTTACTCTTGATTGCAGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAGGCCCTCAAATATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAAGAATAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8779, - "end": 9405, - "orientation": "forward", - "distance": 0.6323809523809525, - "indel_impact": 0, - "protein": "MGGKGSKMRGWVAVREKMRRTKPEDEPAANGVGAASRDLEKYGALTSSNTVATNADLAWLEAQEEEEVGFPVRPQVPLRPMTYKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVHHTQGYLPDWQNYTPGPGIRYPLTFGWCFKLVPVEPDQVEEANAGENNSLLHPMSLHGIEDPEKEVLMWKFDSHLAFRHMARELHPEYYKDC", - "nucleotides": "ATGGGTGGCAAGGGGTCAAAAATGAGGGGATGGGTTGCTGTAAGGGAAAAAATGAGGCGAACTAAGCCAGAAGATGAGCCAGCAGCAAATGGGGTGGGGGCAGCATCTCGAGACTTGGAGAAATATGGCGCACTCACAAGTAGCAATACAGTAGCTACTAATGCTGATTTAGCTTGGCTAGAAGCACAAGAGGAAGAGGAGGTGGGCTTTCCAGTCAGACCTCAAGTACCTTTAAGACCAATGACTTACAAGGGAGCTCTAGATCTCAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGGCAAGATATCCTTGATCTGTGGGTTCACCACACACAAGGCTATCTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATCAGATATCCCCTGACTTTTGGATGGTGCTTCAAGCTAGTACCAGTTGAGCCAGATCAGGTAGAAGAGGCCAATGCAGGAGAGAACAACAGCTTGTTACACCCTATGAGCCTGCATGGGATAGAGGACCCGGAGAAAGAAGTCTTAATGTGGAAGTTTGACAGCCACCTAGCATTCCGTCACATGGCCCGAGAGCTGCATCCGGAGTATTACAAAGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "OQ092464": [ - { - "region": "gag", - "start": 773, - "end": 2275, - "orientation": "forward", - "distance": 0.1840637450199203, - "indel_impact": 0, - "protein": "MGARASVLSGGELDRWERIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSKGCRQILEQLQPALQTGSEELKSLYNTVATLYCVHQKIDVKDTKEALDKIEEEQNKSKKKAQQAAADTGNNSQASQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQCTQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTTPGTIMMQRGNLRSQRKTVKCFNCGKEGHIAKNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKQEPIDKEMYPLASLRSLFGNDPSSQ", - "nucleotides": "ATGGGTGCGAGAGCGTCGGTATTAAGCGGGGGAGAATTAGATAGATGGGAAAGAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGGTTCGCAGTTAATCCTGGCCTGTTAGAAACATCAAAAGGCTGTAGACAAATATTGGAACAGCTACAGCCAGCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAAGATCGATGTAAAAGACACCAAGGAAGCTTTAGATAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGAAACAACAGCCAGGCCAGCCAAAATTACCCTATAGTGCAGAACATTCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCGCTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAACAATGTACACAGGAGGTAAAAAATTGGATGACAGAAACATTATTGGTCCAAAATGCAAACCCAGATTGCAAGACTATTTTAAAAGCATTAGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCTGGCCATAAAGCAAGAGTTTTGGCAGAAGCAATGAGCCAAGTAACAACTCCAGGTACCATAATGATGCAGAGAGGCAATTTAAGGAGCCAAAGAAAGACTGTTAAGTGTTTCAACTGTGGCAAAGAAGGGCATATAGCTAAAAATTGCAGGGCTCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAAGAAGGACACCAAATGAAAGATTGCACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGATAGACAAAGAAATGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 2068, - "end": 5079, - "orientation": "forward", - "distance": 0.09781312127236585, - "indel_impact": 0, - "protein": "FFREDLAFPQGEAREFSSEQTRANSPTRRELQVWGRDNNSLSEAGADRQRNVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEDMTLPGRWKPKMIGGIGGFIKVRQYDQILVEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVIPLTREAELELAENREILKEPVHGVYYDPTKDLIVEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFRLPIQRETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIKKDKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDDHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGGNFISTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVYIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKLQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGATAGACAAAGAAATGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGACATGACTTTACCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCGTAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTAACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATCTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATATAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATAAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAATTAAGACAACATCTGTTGAAGTGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGATATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAGGACAGTTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAAGCAATTATGTAAACTCCTTAGGGGAACCAAGGCACTAACAGAAGTAATACCACTAACAAGAGAAGCAGAGCTAGAACTGGCAGAGAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCAACAAAAGACTTAATAGTAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGAGGTGCCCACACTAATGATGTAAAACAACTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAGTTTAGACTACCCATACAAAGAGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAGACTTTCTATGTAGATGGTGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTTAATCAAATAATAGAACAGTTAATAAAAAAGGACAAGGTATACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGACGATCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGACTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGCCAGTTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACACTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCGGCAATTTCATCAGTACTACAGTTAAGGCCGCCTGCTGGTGGGCGGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATACATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAGAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAACTTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGACCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 5024, - "end": 5602, - "orientation": "forward", - "distance": 0.234375, - "indel_impact": 0, - "protein": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYISRKAKGWVYKHHYDSTHPRISSEVHIPLGDAKLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPGLADQLIHLYYFDCFSESAIRNAILGHIVSPSCDYQAGHNKVGSLQYLALAALITPKKIKPPLPSVKKLTEDRWNKPQKTKGHRGSHTMNGH", - "nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGGACATGGAAAAGTTTAGTAAAACACCATATGTATATTTCAAGGAAAGCTAAGGGATGGGTTTATAAGCATCACTATGACAGTACTCATCCAAGAATAAGTTCAGAAGTTCACATCCCGCTAGGGGATGCGAAATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAGAAGAGATATAGCACACAAGTAGACCCTGGCTTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGCTGTGATTATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTAAGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5542, - "end": 5832, - "orientation": "forward", - "distance": 0.2268041237113403, - "indel_impact": 0, - "protein": "MEQAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHSLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGIIQRRRARNGASRS", - "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACACTTTCCTAGGCCATGGCTCCACAGCTTAGGGCAACATATCTATGAAACATATGGAGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCATCATTCAACGGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5813, - "end": 6027, - "orientation": "forward", - "distance": 0.29166666666666674, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPRTACTSCYCKQCCFHCQVCFITKGLGISYGRKKRRQRRRAPPDSQTHQASLSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGATTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTGCTTGTACCTCTTGTTATTGTAAGCAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCCAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5952, - "end": 6027, - "orientation": "forward", - "distance": 0.23076923076923084, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLQTVRLIKLLYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCCAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 6044, - "end": 6289, - "orientation": "forward", - "distance": 0.5697674418604652, - "indel_impact": 0, - "protein": "MQPLHIAAIVALVVAIIIAIVVWSIVFIEYRKILRQRKIDRIIDRIRERAEDSGNESEGDQEELSALVEMGHCAPWDVNDL", - "nucleotides": "ATGCAACCTTTACACATAGCAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCTATAGTATTCATAGAATATAGGAAAATATTAAGACAAAGGAAAATAGACAGGATAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAGGGGGATCAAGAAGAATTATCAGCACTTGTGGAGATGGGGCATTGTGCTCCTTGGGATGTTAATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 6207, - "end": 8789, - "orientation": "forward", - "distance": 0.45848416289592775, - "indel_impact": 0, - "protein": "MRVRGIKKNYQHLWRWGIVLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLGNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNYNNTNNTTSTNDTTSTTTSSGEKMKEGEIKNCSFNITTSIRDKVQKEYALFYKPDIVPIDNDNTSYRLISCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNDKKFNGTGSCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSENFSDNAKTIIVHLNESVEITCIRPNNNTRKSIPIGPGRAFYATGAIIGDIRQAHCKINKTKWNNTLDQIFKKLREQFGNKTIIFTHSSGGDPEVVTHNFNCGGEFFYCNTTKLFNSTWNETSYWKDERSNDNDTITLPCRIKQIINLWQEVGKAMYAPPIRGYIKCSSNITGLLITRDGGKNESNTTETFRPGGGNMRDNWRSELYKYKVVKIEPLGIAPTKAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASITLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWDNMTWMEWEREIDNYTSLIYTLIEKSQNQQEKNEQELLELDKWASLWNWFDITKWLWYIKIFIMIVGGLVGLRIVFAVLSIANRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSGPLVNGFLTIIWVDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWELLKYWWNLLQYWSQELKNSAISLLNATAIAVAEGTDRIIEVLQRAGRAFLHIPRRIRQGLERALL", - "nucleotides": "ATGAGAGTGAGGGGGATCAAGAAGAATTATCAGCACTTGTGGAGATGGGGCATTGTGCTCCTTGGGATGTTAATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACTACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCTACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGGAAATGTGACAGAGAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGTCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTGAATTGCACTAATTATAATAACACCAATAATACCACTAGTACCAATGATACCACTAGTACCACTACTAGTAGCGGGGAAAAGATGAAGGAAGGAGAGATAAAAAACTGCTCTTTCAATATCACCACAAGCATAAGAGATAAGGTGCAGAAAGAATATGCACTCTTTTATAAACCTGATATAGTACCAATAGATAATGATAATACTAGTTATAGGTTGATAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCAGCTGGTTTTGCGATTCTAAAGTGTAACGATAAGAAGTTCAATGGAACAGGATCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCTCAGACAATGCTAAAACCATAATAGTACATCTGAATGAATCTGTAGAAATTACTTGTATAAGACCCAACAACAATACAAGAAAAAGTATACCTATAGGACCAGGGAGAGCATTTTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAAAATTAATAAAACGAAATGGAATAACACTTTAGACCAGATATTTAAAAAGTTAAGAGAACAATTTGGGAATAAGACAATAATCTTTACTCACTCCTCAGGAGGGGACCCAGAAGTTGTAACGCACAATTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACAACAAAACTGTTTAATAGTACTTGGAATGAGACTAGTTATTGGAAGGATGAAAGGTCAAATGACAATGACACTATCACACTCCCATGCAGAATAAAGCAAATTATAAACCTGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGATACATTAAATGTTCATCAAATATCACAGGGCTGCTAATAACAAGAGATGGTGGGAAAAACGAGAGCAACACCACCGAGACCTTCAGACCTGGAGGAGGAAATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAATAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATAACGCTGACGGTACAGGCCAGACTATTATTGTCTGGTATAGTGCAACAGCAAAACAATCTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGGATAACATGACCTGGATGGAATGGGAAAGAGAAATTGACAATTACACAAGCTTAATATACACTTTAATTGAAAAATCGCAAAACCAACAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTGACATAACAAAGTGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGCGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCAGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGACCATTAGTGAACGGATTCTTAACAATTATCTGGGTCGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAACTCCTCAAATATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAAGAATAGTGCTATTAGCTTGCTTAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGATTATAGAAGTATTACAAAGAGCTGGGAGAGCTTTTCTCCACATACCTAGAAGAATAAGACAGGGCTTAGAAAGGGCTTTGCTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8371, - "end": 8463, - "orientation": "forward", - "distance": 0.40625, - "indel_impact": 0, - "protein": "RPTSQPRGDPTGQKESKKKVERETETDPDH", - "nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCAGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGACCATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8372, - "end": 8647, - "orientation": "forward", - "distance": 0.26086956521739135, - "indel_impact": 0, - "protein": "DPPPSPEGTRQARRNRRRRWRERQRQIRTISERILNNYLGRPAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGTPQILVESPAVLESGTKE", - "nucleotides": "GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCAGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGACCATTAGTGAACGGATTCTTAACAATTATCTGGGTCGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAACTCCTCAAATATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAAGAATAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8791, - "end": 9411, - "orientation": "forward", - "distance": 0.3058252427184467, - "indel_impact": 0, - "protein": "MGGKWSKSSVVGWPAIRERMRRAEPAADGVGAASRDLEKHGAITSSNTAATNADCAWLEAQEDEEVGFPVRPQVPLRPMTYKGAVDLSHFLKEKGGLEGLIHSQKRQDILDLWVYHTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPVEPDKVEEANEGENNSLLHPMSLHGMDDPEKEVLVWRFDSRLAFHHMARELHPEYYKDC", - "nucleotides": "ATGGGTGGTAAGTGGTCAAAGAGTAGTGTGGTTGGATGGCCTGCTATAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATGGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGTGCCTGGCTAGAAGCACAAGAGGATGAGGAGGTGGGTTTTCCAGTCAGGCCTCAGGTACCTTTAAGACCAATGACTTACAAGGGAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAAAAGACAAGATATCCTTGATCTGTGGGTCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATCAGGTATCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGAGCCAGATAAGGTAGAAGAGGCCAATGAAGGAGAGAACAACAGCTTGTTACACCCTATGAGCCTGCATGGGATGGATGACCCAGAGAAAGAAGTGTTAGTATGGAGGTTTGACAGTCGCCTGGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAGTACTACAAGGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "OQ092467": [ - { - "region": "gag", - "start": 808, - "end": 2307, - "orientation": "forward", - "distance": 0.2507968127490039, - "indel_impact": 0, - "protein": "MGARASVLSGGELDKWEKIRLRPGGKKQYKLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPALQTGSEEFRSLYNTVATLYCVHQKIEVKDTKEALEKIEEEQNQSKKKAAAAAADTGNRSQVSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVIEEKAFSPEVIPMFTALSEGATPQDLNTMLNTIGGHQAAMQMLKETINEEAAEWDRLHPVQAGPVAPGQMREPRGSDIAGTTSNLQEQIGWMTHNPPIPVGDIYKRWIILGLNKIVRMYSPVSILDIKQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSTVMMQRGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSKPEPTAPPEESFRFGEETTTPSQKQGPIDKELYPLASLKSLFGNDPSSQ", - "nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATAAATGGGAAAAAATTCGGTTAAGGCCAGGAGGAAAAAAACAATACAAATTAAAACATCTAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTTCTAGAGACATCAGAAGGGTGTAGACAAATACTGGGACAGCTACAACCAGCTCTTCAGACAGGATCAGAAGAATTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAAGATAGAGGTAAAAGACACCAAGGAAGCTTTAGAGAAAATAGAGGAAGAGCAAAACCAAAGTAAGAAAAAAGCAGCAGCTGCAGCAGCTGACACAGGAAACAGAAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCTTTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAATAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTACAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAATAGGTGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATTAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCAGGCAGGGCCTGTTGCACCAGGCCAAATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTAACCTTCAGGAACAAATAGGATGGATGACACATAATCCACCTATCCCAGTAGGAGATATCTATAAAAGATGGATAATTCTGGGGTTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTGAGAGCCGAGCAAGCAACACAGGAAGTAAAGAATTGGATGACAGAAACTTTGCTGGTCCAAAATGCAAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGGGGACCAGGCCACAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAACTGTAATGATGCAGAGAGGCAATTTTAGGAATCAAAGAAAGACAGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCATATAGCAAGAAATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGAAATGTGGAAAAGAAGGGCACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAGCAAACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGGGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 2100, - "end": 5111, - "orientation": "forward", - "distance": 0.14342629482071723, - "indel_impact": 0, - "protein": "FFREDLAFPQGEAREFSSEQTRANSPTRRELQVWGRDNNSLSEAGADRQGTVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEDMNLPGKWKPKMIGGIGGFIKVRQYDQILVEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTRILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLKWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIKLPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTAEAELELAENREILKEPVHGVYYDPTKDLIAELQKQGQSQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIAIESIVIWGKTPKFKLPIQKETWETWWTDYWQATWIPDWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETRSGKAGYVTDRGRQKVVPLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSSGIRKILFLDGITKAQDDHERYHSNWRAMASDFNLPPIIAKEIVASCDKCQLKGEATHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTVHTDNGSNFTSAAVKAACWWAGIKQEFGIPYNPQSQGVIESMNKELKKIIGQVREQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAGCAAACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGGGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAAAAATAGGGGGGCAGCTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGATATGAATTTACCAGGAAAATGGAAACCAAAAATGATAGGAGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCGTAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGCTGTACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGATAGTACTAAATGGAGAAAGTTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCGCATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTAGATGTGGGTGATGCCTATTTTTCAATTCCCTTAGATAAAGACTTCAGGAAGTACACTGCATTTACCATACCTAGTACAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAGGATATTAGAACCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTGGGATCTGACTTAGAAATAGGACAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTGAAGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAAAGCTGCCAGAAAAAGATAGCTGGACTGTCAATGACATCCAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTATGCAGGAATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACTAAAGCACTAACAGAAGTAGTACCACTAACAGCAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGAACCGGTACATGGAGTGTATTATGACCCAACGAAAGACCTAATAGCAGAACTACAGAAGCAGGGGCAAAGTCAATGGACATATCAAATTTATCAAGAACCATTTAAAAATCTGAAAACAGGAAAGTATGCAAGAATGAGGGGTGCCCACACTAATGATGTGAAACAATTAACAGAGGCAGTGCAAAAAATAGCCATAGAAAGCATAGTAATATGGGGAAAGACCCCTAAATTTAAATTACCTATACAAAAAGAAACATGGGAAACATGGTGGACAGACTATTGGCAAGCCACCTGGATTCCTGATTGGGAGTTTGTCAATACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAAAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAGATCAGGCAAAGCAGGATATGTTACTGATAGAGGAAGACAAAAGGTTGTCCCCCTAACAGACACAACAAACCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCAGGATTAGAAGTAAACATAGTGACAGACTCACAATATGCACTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGTTAATAAAGAAGGAAAAGGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTTCTGGAATCAGGAAGATACTATTTTTAGATGGAATAACTAAGGCCCAAGATGATCATGAGAGATACCACAGCAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTATAATAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCACGCATGGACAAGTAGACTGCAGTCCAGGAATATGGCAACTAGATTGTACGCATTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAAGAAACAGCATACTTTCTCTTAAAACTAGCAGGAAGATGGCCAGTAAAGACAGTACATACAGATAATGGCAGCAATTTCACCAGTGCTGCGGTTAAGGCCGCCTGTTGGTGGGCGGGGATCAAGCAGGAATTTGGTATTCCCTACAATCCCCAAAGTCAAGGAGTCATAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAAGTAAGAGAGCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAATTACAGAAACAAATCACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGACCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAGGCAAAGATCATTAGAGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 5056, - "end": 5634, - "orientation": "forward", - "distance": 0.44041450777202074, - "indel_impact": 0, - "protein": "MENRWQVMIVWQVDRMRIRTWNSLVKHHIHVSKKAKGWVYRHHYESTHPRISSEVHIPLGEARLVIATYWGLHTGERDWHLGQGVSIEWREKKYITQVDPDLADQLIHLHYFDCFSESAIRNAIVGRLVSPQCEYQTGHNKVGSLQYLALVALITPKKRKPPLPSVRKLTEDRWNKPQKTKDHRGSHIMSGH", - "nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATACATGTCTCAAAGAAAGCTAAGGGATGGGTTTATAGACACCACTATGAAAGCACCCATCCAAGAATAAGTTCAGAAGTACATATCCCGCTAGGGGAAGCTAGATTAGTAATAGCAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCACTTGGGTCAGGGAGTCTCCATAGAATGGAGGGAAAAGAAATATATCACACAAGTAGACCCTGACCTAGCAGACCAACTAATTCATCTGCATTATTTTGATTGTTTTTCAGAGTCTGCTATAAGAAATGCCATAGTAGGACGTTTAGTTAGCCCTCAGTGTGAATATCAAACAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCACTAGTAGCATTAATAACGCCAAAAAAGAGAAAGCCACCTTTGCCTAGTGTTAGGAAATTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGACCACAGAGGGAGCCATATAATGAGTGGACACTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5574, - "end": 5864, - "orientation": "forward", - "distance": 0.28125, - "indel_impact": 0, - "protein": "MEQAPEDQGPQREPYNEWTLELLEELKSEAVRHFPRIWLHNLGQYIYATYGDTWTGVEALIRILQQLLFIHFRIGCQHSRIGITRQRRARNGASRS", - "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGACCACAGAGGGAGCCATATAATGAGTGGACACTAGAGCTTTTAGAGGAACTTAAGAGTGAAGCTGTTAGACATTTTCCTAGGATATGGCTCCATAACTTAGGACAATACATCTATGCAACTTATGGGGATACTTGGACAGGAGTGGAAGCTTTAATAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGAATTGGATGTCAACATAGCAGAATAGGCATTACTCGACAGAGGAGAGCAAGGAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5845, - "end": 6059, - "orientation": "forward", - "distance": 0.25, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRPPQNSETHQAALSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAGACTCCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTCTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAAACAGTGAGACTCATCAAGCAGCTCTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5984, - "end": 6059, - "orientation": "forward", - "distance": 0.46153846153846145, - "indel_impact": 0, - "protein": "MAGRSGDSDEDLLKTVRLIKQLYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAAACAGTGAGACTCATCAAGCAGCTCTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 6076, - "end": 6339, - "orientation": "forward", - "distance": 0.6090909090909091, - "indel_impact": 0, - "protein": "MQPLVILAIVALVVAAIIAIVVWTIVLIEYRKILRQRKIDSIINRIRERAEDSGNESEGDQEELSALVEMGHHVEMGHHAPWNVDDL", - "nucleotides": "ATGCAACCCTTAGTAATATTAGCAATAGTAGCATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAGAAAGATAGATAGCATAATTAATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGATCAGGAAGAATTGTCGGCACTTGTGGAGATGGGGCACCATGTGGAAATGGGGCACCATGCTCCTTGGAATGTTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 6239, - "end": 8806, - "orientation": "forward", - "distance": 0.5505630630630636, - "indel_impact": 0, - "protein": "MRVKEIRKNCRHLWRWGTMWKWGTMLLGMLMICSAKEQLWVTVYYGVPVWKEATTTLFCASNAKAYDPEVHNVWATHACVPTDPNPQEVPLENVTENFNMWKNNMVEQMHEDIISLWDQSLKPCVKLTPLCVILNCTNVNVTTNNNSSSEEQMEVGEIKNCSFNIATRIKNKIKKEYALFNRLDVVPIEDDNTSYMLINCNTSVTTQACPKVTFEPIPIHYCAPAGFAILKCNDKKFNGTGPCNNVSTVQCTHGIKPVVSTQLLLNGSLAEEEIVVRSENFTNNAKTIIVQLNKTIEINCIRPNNNTRKSISLRPGQAIYATEDIIGNIRQAHCNIRRKDWDKALEQVVAKLREQFKNKTIVFNQSSGGDPEIVMHSFNCAGEFFYCNTTKLFNSTWNVNNTRNNTTDNSTITLPCRIKQIINRWQEVGKAMYAPPIKGQIKCSSNITGLLLTRDGGVREDNAPEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQRGKRAVTLGAMFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLQDQQLLGIWGCSGKLICTTAVPWNTSWSNKSLEKIWKNMTWMEWEKEINNYTRTIYTLIEESQNQQEKNEQELLELDKWASLWNWFDITNWLWYIKIFIMIVGGIVGLRIVFTVLSIVNRVRQGYSPLSFQTRFPAPRGPDRPEGTEEEGGERDRDRSGQLVNGFFALIWDDLQSLCLFSYRRLRDLLLIVARIVELLGHRGWEALKYWWNLLQYWSQELKKSAVSLLNATAIAVAEGTDRVIEVVQRIGRAILHIPRRIRQGLERALL", - "nucleotides": "ATGAGAGTGAAGGAGATCAGGAAGAATTGTCGGCACTTGTGGAGATGGGGCACCATGTGGAAATGGGGCACCATGCTCCTTGGAATGTTGATGATCTGTAGTGCTAAAGAACAATTGTGGGTCACAGTTTATTATGGGGTACCTGTGTGGAAAGAAGCAACCACCACTTTATTTTGTGCATCAAATGCTAAAGCATATGACCCAGAGGTGCATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCTAACCCACAAGAAGTACCATTGGAAAATGTGACAGAAAATTTTAATATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGACATAATCAGTTTATGGGATCAAAGCTTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTATTTTAAATTGCACTAATGTGAATGTTACTACTAACAATAATAGTAGTAGTGAGGAACAGATGGAGGTAGGAGAAATAAAAAACTGCTCTTTCAATATTGCCACAAGAATAAAAAATAAGATAAAGAAAGAATATGCACTTTTTAATAGACTTGATGTAGTACCAATAGAGGATGATAATACAAGCTATATGTTGATAAATTGTAATACCTCAGTCACTACACAGGCCTGTCCAAAGGTAACCTTTGAACCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATACTAAAATGTAATGATAAAAAGTTCAATGGAACAGGACCATGTAACAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTGCTGTTAAATGGTAGTCTAGCAGAAGAAGAGATAGTAGTTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACAGCTGAATAAAACTATAGAAATTAATTGTATAAGACCCAACAATAATACAAGAAAAAGTATATCTTTAAGACCGGGGCAAGCAATTTATGCAACAGAAGACATAATAGGAAATATAAGACAAGCACATTGTAACATTAGGAGAAAAGACTGGGATAAAGCTTTAGAACAGGTAGTTGCAAAATTAAGAGAACAATTTAAGAATAAAACAATAGTCTTTAATCAATCCTCAGGAGGAGACCCAGAAATTGTAATGCATAGTTTTAATTGTGCAGGGGAATTTTTCTACTGTAACACAACAAAGCTGTTTAATAGTACTTGGAATGTTAATAACACTCGGAATAATACTACTGATAATAGCACCATCACTCTCCCGTGCAGAATAAAACAAATTATAAACAGATGGCAGGAAGTAGGAAAAGCAATGTATGCTCCTCCCATCAAAGGGCAAATTAAATGTTCATCAAATATTACAGGGTTATTATTAACAAGAGATGGTGGTGTCCGCGAGGACAACGCCCCTGAGATCTTTAGACCTGGAGGAGGAGATATGAGGGATAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTGGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAAAGAGGAAAAAGAGCAGTAACGCTGGGAGCTATGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATGACGCTGACGGTACAGGCCAGGCAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATCTGCTGAGGGCTATTGAGGCGCAACAGCATATGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACTTACAGGATCAACAGCTCCTAGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTACCTTGGAATACTAGTTGGAGTAATAAATCTTTGGAAAAGATTTGGAAAAACATGACCTGGATGGAGTGGGAGAAAGAAATTAACAATTACACAAGGACAATATACACCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAGGAATTATTGGAATTGGATAAGTGGGCAAGTTTGTGGAATTGGTTTGACATAACAAACTGGCTGTGGTATATAAAAATATTCATCATGATAGTAGGAGGTATAGTAGGGTTAAGAATAGTTTTTACTGTGCTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTGTCATTCCAGACCCGCTTCCCAGCTCCGAGGGGACCAGACAGGCCCGAAGGAACAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGGCAATTAGTGAACGGATTCTTTGCGCTTATCTGGGACGACCTGCAGAGCCTGTGCCTCTTCAGCTACCGCCGCTTGAGAGACTTACTCTTGATTGTAGCGAGGATTGTGGAACTTCTGGGACACAGGGGGTGGGAGGCCCTCAAATATTGGTGGAATCTCCTGCAGTACTGGAGTCAGGAACTAAAGAAGAGTGCTGTTAGTTTGCTTAATGCCACAGCTATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAATTGGTAGAGCTATTCTCCACATCCCTAGAAGAATAAGACAGGGCTTAGAAAGGGCTTTGCTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8388, - "end": 8480, - "orientation": "forward", - "distance": 0.5806451612903225, - "indel_impact": 0, - "protein": "RPASQLRGDQTGPKEQKKKVERETETDPGN", - "nucleotides": "AGACCCGCTTCCCAGCTCCGAGGGGACCAGACAGGCCCGAAGGAACAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGGCAATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8389, - "end": 8664, - "orientation": "forward", - "distance": 0.3913043478260869, - "indel_impact": 0, - "protein": "DPLPSSEGTRQARRNRRRRWRERQRQIRAISERILCAYLGRPAEPVPLQLPPLERLTLDCSEDCGTSGTQGVGGPQILVESPAVLESGTKEECC", - "nucleotides": "GACCCGCTTCCCAGCTCCGAGGGGACCAGACAGGCCCGAAGGAACAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGGCAATTAGTGAACGGATTCTTTGCGCTTATCTGGGACGACCTGCAGAGCCTGTGCCTCTTCAGCTACCGCCGCTTGAGAGACTTACTCTTGATTGTAGCGAGGATTGTGGAACTTCTGGGACACAGGGGGTGGGAGGCCCTCAAATATTGGTGGAATCTCCTGCAGTACTGGAGTCAGGAACTAAAGAAGAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8808, - "end": 9416, - "orientation": "forward", - "distance": 0.583333333333333, - "indel_impact": 0, - "protein": "MGGKWSKCSLVGWPAIRERMRRAEPAPAAEGVGAASRDLEKHGALTTSNTAANNAACAWLEAQEEEEVGFPVRPQVPLRPMTYKGAMDLGHFLKEKGGLEGLIYSPKRQEILDLWVYHTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPDEGENSCLLHPMNQHGADDTEREVLMWKFDSSLAFHHKARELHPEYYKNC", - "nucleotides": "ATGGGTGGCAAGTGGTCAAAATGTAGTCTGGTTGGATGGCCTGCTATAAGGGAAAGAATGAGACGAGCTGAGCCAGCTCCAGCAGCAGAAGGGGTGGGAGCAGCATCTCGAGACTTGGAAAAACATGGAGCACTCACAACTAGTAATACAGCAGCTAATAATGCTGCTTGTGCCTGGCTGGAAGCACAAGAGGAGGAAGAGGTGGGGTTTCCAGTCAGACCTCAGGTACCTTTAAGACCAATGACTTACAAAGGAGCTATGGATCTTGGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCCAAAAAGACAAGAAATCCTTGATCTGTGGGTCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTATACACCAGGGCCAGGGACTAGATATCCATTAACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGAGCCAGATGAAGGAGAGAATAGCTGTTTGCTACACCCGATGAACCAGCATGGGGCAGATGACACAGAAAGAGAAGTATTAATGTGGAAGTTTGACAGCAGCCTAGCATTTCATCACAAGGCCCGAGAGCTGCATCCGGAGTACTACAAAAACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ] -} \ No newline at end of file +{} \ No newline at end of file diff --git a/tests/expected-results-large/defects.json b/tests/expected-results-large/defects.json index 76ba67c..9e26dfe 100644 --- a/tests/expected-results-large/defects.json +++ b/tests/expected-results-large/defects.json @@ -1,1639 +1 @@ -{ - "KX505501.1": [ - { - "qseqid": "KX505501.1", - "error": "Frameshift", - "message": "ORF 'pol' at 1629-1746 contains out of frame indels that impact 1950 positions.", - "region": "pol" - }, - { - "qseqid": "KX505501.1", - "error": "Deletion", - "message": "ORF 'pol' exceeds maximum deletion tolerance. Contains 2892 deletions with max tolerance of 93 deletions.", - "region": "pol" - }, - { - "qseqid": "KX505501.1", - "error": "SequenceDivergence", - "message": "ORF 'pol' exceeds maximum distance tolerance. It is 2.13586 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.88345.", - "region": "pol" - }, - { - "qseqid": "KX505501.1", - "error": "MutatedStopCodon", - "message": "ORF 'pol' has a mutated stop codon: 'T--'.", - "region": "pol" - }, - { - "qseqid": "KX505501.1", - "error": "Frameshift", - "message": "ORF 'env' at 1747-1746 contains out of frame indels that impact 1714 positions.", - "region": "env" - }, - { - "qseqid": "KX505501.1", - "error": "Deletion", - "message": "ORF 'env' exceeds maximum deletion tolerance. Contains 2346 deletions with max tolerance of 54 deletions.", - "region": "env" - }, - { - "qseqid": "KX505501.1", - "error": "SequenceDivergence", - "message": "ORF 'env' exceeds maximum distance tolerance. It is 2.11186 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.35595.", - "region": "env" - }, - { - "qseqid": "KX505501.1", - "error": "Frameshift", - "message": "ORF 'vif' at 1747-1746 contains out of frame indels that impact 386 positions.", - "region": "vif" - }, - { - "qseqid": "KX505501.1", - "error": "Deletion", - "message": "ORF 'vif' exceeds maximum deletion tolerance. Contains 354 deletions with max tolerance of 12 deletions.", - "region": "vif" - }, - { - "qseqid": "KX505501.1", - "error": "SequenceDivergence", - "message": "ORF 'vif' exceeds maximum distance tolerance. It is 2.04883 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.78802.", - "region": "vif" - }, - { - "qseqid": "KX505501.1", - "error": "Frameshift", - "message": "ORF 'vpr' at 1747-1746 contains out of frame indels that impact 194 positions.", - "region": "vpr" - }, - { - "qseqid": "KX505501.1", - "error": "Deletion", - "message": "ORF 'vpr' exceeds maximum deletion tolerance. Contains 66 deletions with max tolerance of 6 deletions.", - "region": "vpr" - }, - { - "qseqid": "KX505501.1", - "error": "SequenceDivergence", - "message": "ORF 'vpr' exceeds maximum distance tolerance. It is 2.00365 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.58981.", - "region": "vpr" - }, - { - "qseqid": "KX505501.1", - "error": "Frameshift", - "message": "ORF 'tat_exon1' at 1747-1746 contains out of frame indels that impact 144 positions.", - "region": "tat_exon1" - }, - { - "qseqid": "KX505501.1", - "error": "Deletion", - "message": "ORF 'tat_exon1' exceeds maximum deletion tolerance. Contains 69 deletions with max tolerance of 0 deletions.", - "region": "tat_exon1" - }, - { - "qseqid": "KX505501.1", - "error": "SequenceDivergence", - "message": "ORF 'tat_exon1' exceeds maximum distance tolerance. It is 2.21806 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.85696.", - "region": "tat_exon1" - }, - { - "qseqid": "KX505501.1", - "error": "Frameshift", - "message": "ORF 'rev_exon1' at 1747-1746 contains out of frame indels that impact 51 positions.", - "region": "rev_exon1" - }, - { - "qseqid": "KX505501.1", - "error": "Insertion", - "message": "ORF 'rev_exon1' exceeds maximum insertion tolerance. Contains 69 insertions with max tolerance of 0 insertions.", - "region": "rev_exon1" - }, - { - "qseqid": "KX505501.1", - "error": "SequenceDivergence", - "message": "ORF 'rev_exon1' exceeds maximum distance tolerance. It is 2.25 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.92308.", - "region": "rev_exon1" - }, - { - "qseqid": "KX505501.1", - "error": "Frameshift", - "message": "ORF 'vpu' at 1747-1746 contains out of frame indels that impact 166 positions.", - "region": "vpu" - }, - { - "qseqid": "KX505501.1", - "error": "Deletion", - "message": "ORF 'vpu' exceeds maximum deletion tolerance. Contains 24 deletions with max tolerance of 6 deletions.", - "region": "vpu" - }, - { - "qseqid": "KX505501.1", - "error": "SequenceDivergence", - "message": "ORF 'vpu' exceeds maximum distance tolerance. It is 2.044 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.07949.", - "region": "vpu" - }, - { - "qseqid": "KX505501.1", - "error": "Frameshift", - "message": "ORF 'tat_exon2' at 1747-1746 contains out of frame indels that impact 62 positions.", - "region": "tat_exon2" - }, - { - "qseqid": "KX505501.1", - "error": "Deletion", - "message": "ORF 'tat_exon2' exceeds maximum deletion tolerance. Contains 15 deletions with max tolerance of 0 deletions.", - "region": "tat_exon2" - }, - { - "qseqid": "KX505501.1", - "error": "SequenceDivergence", - "message": "ORF 'tat_exon2' exceeds maximum distance tolerance. It is 2.24194 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.18333.", - "region": "tat_exon2" - }, - { - "qseqid": "KX505501.1", - "error": "Frameshift", - "message": "ORF 'rev_exon2' at 1747-1746 contains out of frame indels that impact 184 positions.", - "region": "rev_exon2" - }, - { - "qseqid": "KX505501.1", - "error": "Deletion", - "message": "ORF 'rev_exon2' exceeds maximum deletion tolerance. Contains 198 deletions with max tolerance of 7 deletions.", - "region": "rev_exon2" - }, - { - "qseqid": "KX505501.1", - "error": "SequenceDivergence", - "message": "ORF 'rev_exon2' exceeds maximum distance tolerance. It is 2.21413 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.75773.", - "region": "rev_exon2" - }, - { - "qseqid": "KX505501.1", - "error": "Frameshift", - "message": "ORF 'nef' at 1747-1778 contains out of frame indels that impact 425 positions.", - "region": "nef" - }, - { - "qseqid": "KX505501.1", - "error": "Deletion", - "message": "ORF 'nef' exceeds maximum deletion tolerance. Contains 396 deletions with max tolerance of 48 deletions.", - "region": "nef" - }, - { - "qseqid": "KX505501.1", - "error": "SequenceDivergence", - "message": "ORF 'nef' exceeds maximum distance tolerance. It is 2.0371 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.34148.", - "region": "nef" - }, - { - "qseqid": "KX505501.1", - "error": "MutatedStartCodon", - "message": "ORF 'nef' has a mutated start codon: 'AGA'.", - "region": "nef" - }, - { - "qseqid": "KX505501.1", - "error": "MutatedStopCodon", - "message": "ORF 'nef' has a mutated stop codon: 'GAC'.", - "region": "nef" - }, - { - "qseqid": "KX505501.1", - "error": "RevResponseElementDeletion", - "message": "Query Sequence exceeds maximum deletion tolerance in RRE. Contains 265 deletions with max tolerance of 20 deletions.", - "region": null - }, - { - "qseqid": "KX505501.1", - "error": "LongDeletion", - "message": "Query sequence contains a very large deletion.", - "region": null - }, - { - "qseqid": "KX505501.1", - "error": "Scramble", - "message": "Sequence is plus-scrambled.", - "region": null - } - ], - "MN691959": [ - { - "qseqid": "MN691959", - "error": "Frameshift", - "message": "ORF 'vpu' at 5911-6156 contains out of frame indels that impact 122 positions.", - "region": "vpu" - }, - { - "qseqid": "MN691959", - "error": "InternalStop", - "message": "ORF 'vpu' at 5911-6156 contains an internal stop codon at 5974.", - "region": "vpu" - }, - { - "qseqid": "MN691959", - "error": "SequenceDivergence", - "message": "ORF 'vpu' exceeds maximum distance tolerance. It is 1.70488 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.07949.", - "region": "vpu" - }, - { - "qseqid": "MN691959", - "error": "Frameshift", - "message": "ORF 'tat_exon2' at 8238-8330 contains out of frame indels that impact 32 positions.", - "region": "tat_exon2" - }, - { - "qseqid": "MN691959", - "error": "Deletion", - "message": "ORF 'tat_exon2' exceeds maximum deletion tolerance. Contains 45 deletions with max tolerance of 0 deletions.", - "region": "tat_exon2" - } - ], - "MN692074": [ - { - "qseqid": "MN692074", - "error": "Frameshift", - "message": "ORF 'pol' at 2085-4082 contains out of frame indels that impact 676 positions.", - "region": "pol" - }, - { - "qseqid": "MN692074", - "error": "Deletion", - "message": "ORF 'pol' exceeds maximum deletion tolerance. Contains 981 deletions with max tolerance of 93 deletions.", - "region": "pol" - }, - { - "qseqid": "MN692074", - "error": "MutatedStopCodon", - "message": "ORF 'pol' has a mutated stop codon: 'GAT'.", - "region": "pol" - }, - { - "qseqid": "MN692074", - "error": "Frameshift", - "message": "ORF 'env' at 4083-4082 contains out of frame indels that impact 1714 positions.", - "region": "env" - }, - { - "qseqid": "MN692074", - "error": "Deletion", - "message": "ORF 'env' exceeds maximum deletion tolerance. Contains 2073 deletions with max tolerance of 54 deletions.", - "region": "env" - }, - { - "qseqid": "MN692074", - "error": "SequenceDivergence", - "message": "ORF 'env' exceeds maximum distance tolerance. It is 2.05571 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.35595.", - "region": "env" - }, - { - "qseqid": "MN692074", - "error": "Frameshift", - "message": "ORF 'vif' at 4083-4082 contains out of frame indels that impact 386 positions.", - "region": "vif" - }, - { - "qseqid": "MN692074", - "error": "Deletion", - "message": "ORF 'vif' exceeds maximum deletion tolerance. Contains 81 deletions with max tolerance of 12 deletions.", - "region": "vif" - }, - { - "qseqid": "MN692074", - "error": "SequenceDivergence", - "message": "ORF 'vif' exceeds maximum distance tolerance. It is 1.99787 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.78802.", - "region": "vif" - }, - { - "qseqid": "MN692074", - "error": "Frameshift", - "message": "ORF 'vpr' at 4083-4082 contains out of frame indels that impact 194 positions.", - "region": "vpr" - }, - { - "qseqid": "MN692074", - "error": "Insertion", - "message": "ORF 'vpr' exceeds maximum insertion tolerance. Contains 207 insertions with max tolerance of 6 insertions.", - "region": "vpr" - }, - { - "qseqid": "MN692074", - "error": "SequenceDivergence", - "message": "ORF 'vpr' exceeds maximum distance tolerance. It is 1.9951 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.58981.", - "region": "vpr" - }, - { - "qseqid": "MN692074", - "error": "Frameshift", - "message": "ORF 'tat_exon1' at 4083-4082 contains out of frame indels that impact 144 positions.", - "region": "tat_exon1" - }, - { - "qseqid": "MN692074", - "error": "Insertion", - "message": "ORF 'tat_exon1' exceeds maximum insertion tolerance. Contains 249 insertions with max tolerance of 0 insertions.", - "region": "tat_exon1" - }, - { - "qseqid": "MN692074", - "error": "SequenceDivergence", - "message": "ORF 'tat_exon1' exceeds maximum distance tolerance. It is 2.21806 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.85696.", - "region": "tat_exon1" - }, - { - "qseqid": "MN692074", - "error": "Frameshift", - "message": "ORF 'rev_exon1' at 4083-4082 contains out of frame indels that impact 51 positions.", - "region": "rev_exon1" - }, - { - "qseqid": "MN692074", - "error": "Insertion", - "message": "ORF 'rev_exon1' exceeds maximum insertion tolerance. Contains 387 insertions with max tolerance of 0 insertions.", - "region": "rev_exon1" - }, - { - "qseqid": "MN692074", - "error": "SequenceDivergence", - "message": "ORF 'rev_exon1' exceeds maximum distance tolerance. It is 2.25 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.92308.", - "region": "rev_exon1" - }, - { - "qseqid": "MN692074", - "error": "Frameshift", - "message": "ORF 'vpu' at 4083-4082 contains out of frame indels that impact 166 positions.", - "region": "vpu" - }, - { - "qseqid": "MN692074", - "error": "Insertion", - "message": "ORF 'vpu' exceeds maximum insertion tolerance. Contains 249 insertions with max tolerance of 24 insertions.", - "region": "vpu" - }, - { - "qseqid": "MN692074", - "error": "SequenceDivergence", - "message": "ORF 'vpu' exceeds maximum distance tolerance. It is 2.00052 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.07949.", - "region": "vpu" - }, - { - "qseqid": "MN692074", - "error": "Frameshift", - "message": "ORF 'tat_exon2' at 4083-4082 contains out of frame indels that impact 62 positions.", - "region": "tat_exon2" - }, - { - "qseqid": "MN692074", - "error": "Deletion", - "message": "ORF 'tat_exon2' exceeds maximum deletion tolerance. Contains 60 deletions with max tolerance of 0 deletions.", - "region": "tat_exon2" - }, - { - "qseqid": "MN692074", - "error": "SequenceDivergence", - "message": "ORF 'tat_exon2' exceeds maximum distance tolerance. It is 2.24194 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.18333.", - "region": "tat_exon2" - }, - { - "qseqid": "MN692074", - "error": "Frameshift", - "message": "ORF 'rev_exon2' at 4083-4082 contains out of frame indels that impact 184 positions.", - "region": "rev_exon2" - }, - { - "qseqid": "MN692074", - "error": "Deletion", - "message": "ORF 'rev_exon2' exceeds maximum deletion tolerance. Contains 243 deletions with max tolerance of 7 deletions.", - "region": "rev_exon2" - }, - { - "qseqid": "MN692074", - "error": "SequenceDivergence", - "message": "ORF 'rev_exon2' exceeds maximum distance tolerance. It is 2.21413 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.75773.", - "region": "rev_exon2" - }, - { - "qseqid": "MN692074", - "error": "Frameshift", - "message": "ORF 'nef' at 4083-4086 contains out of frame indels that impact 412 positions.", - "region": "nef" - }, - { - "qseqid": "MN692074", - "error": "Deletion", - "message": "ORF 'nef' exceeds maximum deletion tolerance. Contains 123 deletions with max tolerance of 48 deletions.", - "region": "nef" - }, - { - "qseqid": "MN692074", - "error": "SequenceDivergence", - "message": "ORF 'nef' exceeds maximum distance tolerance. It is 2.00372 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.34148.", - "region": "nef" - }, - { - "qseqid": "MN692074", - "error": "MutatedStartCodon", - "message": "ORF 'nef' has a mutated start codon: 'ACC'.", - "region": "nef" - }, - { - "qseqid": "MN692074", - "error": "MutatedStopCodon", - "message": "ORF 'nef' has a mutated stop codon: 'CAG'.", - "region": "nef" - }, - { - "qseqid": "MN692074", - "error": "RevResponseElementDeletion", - "message": "Query Sequence exceeds maximum deletion tolerance in RRE. Contains 265 deletions with max tolerance of 20 deletions.", - "region": null - }, - { - "qseqid": "MN692074", - "error": "LongDeletion", - "message": "Query sequence contains a very large deletion.", - "region": null - } - ], - "MN692145": [], - "MN090335": [ - { - "qseqid": "MN090335", - "error": "Frameshift", - "message": "ORF 'gag' at 482-1665 contains out of frame indels that impact 1230 positions.", - "region": "gag" - }, - { - "qseqid": "MN090335", - "error": "InternalStop", - "message": "ORF 'gag' at 482-1665 contains an internal stop codon at 683.", - "region": "gag" - }, - { - "qseqid": "MN090335", - "error": "MutatedStartCodon", - "message": "ORF 'gag' has a mutated start codon: 'ATA'.", - "region": "gag" - }, - { - "qseqid": "MN090335", - "error": "MutatedStopCodon", - "message": "ORF 'gag' has a mutated stop codon: 'AAA'.", - "region": "gag" - }, - { - "qseqid": "MN090335", - "error": "Frameshift", - "message": "ORF 'tat_exon2' at 7740-7832 contains out of frame indels that impact 32 positions.", - "region": "tat_exon2" - }, - { - "qseqid": "MN090335", - "error": "Deletion", - "message": "ORF 'tat_exon2' exceeds maximum deletion tolerance. Contains 45 deletions with max tolerance of 0 deletions.", - "region": "tat_exon2" - }, - { - "qseqid": "MN090335", - "error": "PackagingSignalDeletion", - "message": "Query sequence exceeds maximum deletion tolerance in PSI. Contains 42 deletions with max tolerance of 10 deletions.", - "region": null - }, - { - "qseqid": "MN090335", - "error": "MajorSpliceDonorSiteMutated", - "message": "Query sequence has a mutated splice donor site: AT. The context is TTAACTGCGAAT-----CGTTC.", - "region": null - }, - { - "qseqid": "MN090335", - "error": "Scramble", - "message": "Sequence is minus-scrambled.", - "region": null - }, - { - "qseqid": "MN090335", - "error": "InternalInversion", - "message": "Sequence contains an internal inversion.", - "region": null - } - ], - "MN090376": [ - { - "qseqid": "MN090376", - "error": "Frameshift", - "message": "ORF 'gag' at 541-1590 contains out of frame indels that impact 754 positions.", - "region": "gag" - }, - { - "qseqid": "MN090376", - "error": "InternalStop", - "message": "ORF 'gag' at 541-1590 contains an internal stop codon at 598.", - "region": "gag" - }, - { - "qseqid": "MN090376", - "error": "SequenceDivergence", - "message": "ORF 'gag' exceeds maximum distance tolerance. It is 1.33506 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.98443.", - "region": "gag" - }, - { - "qseqid": "MN090376", - "error": "MutatedStartCodon", - "message": "ORF 'gag' has a mutated start codon: 'TGC'.", - "region": "gag" - }, - { - "qseqid": "MN090376", - "error": "PackagingSignalDeletion", - "message": "Query sequence exceeds maximum deletion tolerance in PSI. Contains 95 deletions with max tolerance of 10 deletions.", - "region": null - }, - { - "qseqid": "MN090376", - "error": "MajorSpliceDonorSiteMutated", - "message": "Query sequence has a missing splice donor site: --. The context is ----------------------.", - "region": null - }, - { - "qseqid": "MN090376", - "error": "Scramble", - "message": "Sequence is minus-scrambled.", - "region": null - }, - { - "qseqid": "MN090376", - "error": "InternalInversion", - "message": "Sequence contains an internal inversion.", - "region": null - } - ], - "MK115581.1": [], - "MK115690.1": [ - { - "qseqid": "MK115690.1", - "error": "PackagingSignalDeletion", - "message": "Query sequence exceeds maximum deletion tolerance in PSI. Contains 12 deletions with max tolerance of 10 deletions.", - "region": null - }, - { - "qseqid": "MK115690.1", - "error": "MajorSpliceDonorSiteMutated", - "message": "Query sequence has a mutated splice donor site: G-. The context is GCGGCGACTGG-----------.", - "region": null - }, - { - "qseqid": "MK115690.1", - "error": "Scramble", - "message": "Sequence is plus-scrambled.", - "region": null - } - ], - "MK115571.1": [ - { - "qseqid": "MK115571.1", - "error": "PackagingSignalDeletion", - "message": "Query sequence exceeds maximum deletion tolerance in PSI. Contains 79 deletions with max tolerance of 10 deletions.", - "region": null - }, - { - "qseqid": "MK115571.1", - "error": "MajorSpliceDonorSiteMutated", - "message": "Query sequence has a missing splice donor site: --. The context is ----------------------.", - "region": null - } - ], - "MK115514.1": [ - { - "qseqid": "MK115514.1", - "error": "MajorSpliceDonorSiteMutated", - "message": "Query sequence has a missing splice donor site: --. The context is GCGGCGACT--------CGAAA.", - "region": null - } - ], - "MK115488.1": [ - { - "qseqid": "MK115488.1", - "error": "MajorSpliceDonorSiteMutated", - "message": "Query sequence has a missing splice donor site: --. The context is GCGGCGACT--------CGAAA.", - "region": null - } - ], - "MK115030.1": [ - { - "qseqid": "MK115030.1", - "error": "Scramble", - "message": "Sequence is minus-scrambled.", - "region": null - }, - { - "qseqid": "MK115030.1", - "error": "InternalInversion", - "message": "Sequence contains an internal inversion.", - "region": null - } - ], - "MK115498.1": [], - "MK115211.1": [ - { - "qseqid": "MK115211.1", - "error": "Frameshift", - "message": "ORF 'env' at 5688-8198 contains out of frame indels that impact 757 positions.", - "region": "env" - }, - { - "qseqid": "MK115211.1", - "error": "InternalStop", - "message": "ORF 'env' at 5688-8198 contains an internal stop codon at 6354.", - "region": "env" - }, - { - "qseqid": "MK115211.1", - "error": "PackagingSignalDeletion", - "message": "Query sequence exceeds maximum deletion tolerance in PSI. Contains 20 deletions with max tolerance of 10 deletions.", - "region": null - }, - { - "qseqid": "MK115211.1", - "error": "MajorSpliceDonorSiteMutated", - "message": "Query sequence has a missing splice donor site: --. The context is GCGGCG----------------.", - "region": null - }, - { - "qseqid": "MK115211.1", - "error": "Scramble", - "message": "Sequence is minus-scrambled.", - "region": null - }, - { - "qseqid": "MK115211.1", - "error": "InternalInversion", - "message": "Sequence contains an internal inversion.", - "region": null - } - ], - "MK115158.1": [ - { - "qseqid": "MK115158.1", - "error": "Deletion", - "message": "ORF 'tat_exon2' exceeds maximum deletion tolerance. Contains 15 deletions with max tolerance of 0 deletions.", - "region": "tat_exon2" - }, - { - "qseqid": "MK115158.1", - "error": "PackagingSignalDeletion", - "message": "Query sequence exceeds maximum deletion tolerance in PSI. Contains 20 deletions with max tolerance of 10 deletions.", - "region": null - }, - { - "qseqid": "MK115158.1", - "error": "MajorSpliceDonorSiteMutated", - "message": "Query sequence has a missing splice donor site: --. The context is GCGGCGAACG------------.", - "region": null - } - ], - "MK114705.1": [ - { - "qseqid": "MK114705.1", - "error": "Frameshift", - "message": "ORF 'nef' at 8551-9118 contains out of frame indels that impact 122 positions.", - "region": "nef" - }, - { - "qseqid": "MK114705.1", - "error": "InternalStop", - "message": "ORF 'nef' at 8551-9118 contains an internal stop codon at 8983.", - "region": "nef" - }, - { - "qseqid": "MK114705.1", - "error": "MutatedStopCodon", - "message": "ORF 'nef' has a mutated stop codon: 'ACG'.", - "region": "nef" - } - ], - "MK114856.1": [ - { - "qseqid": "MK114856.1", - "error": "Frameshift", - "message": "ORF 'gag' at 493-2022 contains out of frame indels that impact 828 positions.", - "region": "gag" - }, - { - "qseqid": "MK114856.1", - "error": "InternalStop", - "message": "ORF 'gag' at 493-2022 contains an internal stop codon at 538.", - "region": "gag" - }, - { - "qseqid": "MK114856.1", - "error": "SequenceDivergence", - "message": "ORF 'gag' exceeds maximum distance tolerance. It is 1.871 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.98443.", - "region": "gag" - }, - { - "qseqid": "MK114856.1", - "error": "MutatedStartCodon", - "message": "ORF 'gag' has a mutated start codon: 'ATA'.", - "region": "gag" - }, - { - "qseqid": "MK114856.1", - "error": "Frameshift", - "message": "ORF 'pol' at 1788-4826 contains out of frame indels that impact 1998 positions.", - "region": "pol" - }, - { - "qseqid": "MK114856.1", - "error": "InternalStop", - "message": "ORF 'pol' at 1788-4826 contains an internal stop codon at 1917.", - "region": "pol" - }, - { - "qseqid": "MK114856.1", - "error": "Frameshift", - "message": "ORF 'env' at 5954-8521 contains out of frame indels that impact 1453 positions.", - "region": "env" - }, - { - "qseqid": "MK114856.1", - "error": "InternalStop", - "message": "ORF 'env' at 5954-8521 contains an internal stop codon at 6056.", - "region": "env" - }, - { - "qseqid": "MK114856.1", - "error": "SequenceDivergence", - "message": "ORF 'env' exceeds maximum distance tolerance. It is 1.89012 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.35595.", - "region": "env" - }, - { - "qseqid": "MK114856.1", - "error": "Frameshift", - "message": "ORF 'vif' at 4771-5349 contains out of frame indels that impact 328 positions.", - "region": "vif" - }, - { - "qseqid": "MK114856.1", - "error": "InternalStop", - "message": "ORF 'vif' at 4771-5349 contains an internal stop codon at 4882.", - "region": "vif" - }, - { - "qseqid": "MK114856.1", - "error": "SequenceDivergence", - "message": "ORF 'vif' exceeds maximum distance tolerance. It is 1.92813 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.78802.", - "region": "vif" - }, - { - "qseqid": "MK114856.1", - "error": "MutatedStartCodon", - "message": "ORF 'vif' has a mutated start codon: 'ATA'.", - "region": "vif" - }, - { - "qseqid": "MK114856.1", - "error": "Frameshift", - "message": "ORF 'vpr' at 5289-5579 contains out of frame indels that impact 143 positions.", - "region": "vpr" - }, - { - "qseqid": "MK114856.1", - "error": "InternalStop", - "message": "ORF 'vpr' at 5289-5579 contains an internal stop codon at 5340.", - "region": "vpr" - }, - { - "qseqid": "MK114856.1", - "error": "SequenceDivergence", - "message": "ORF 'vpr' exceeds maximum distance tolerance. It is 1.70619 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.58981.", - "region": "vpr" - }, - { - "qseqid": "MK114856.1", - "error": "MutatedStartCodon", - "message": "ORF 'vpr' has a mutated start codon: 'ATA'.", - "region": "vpr" - }, - { - "qseqid": "MK114856.1", - "error": "Frameshift", - "message": "ORF 'tat_exon1' at 5560-5774 contains out of frame indels that impact 123 positions.", - "region": "tat_exon1" - }, - { - "qseqid": "MK114856.1", - "error": "Deletion", - "message": "ORF 'tat_exon1' exceeds maximum deletion tolerance. Contains 186 deletions with max tolerance of 0 deletions.", - "region": "tat_exon1" - }, - { - "qseqid": "MK114856.1", - "error": "MutatedStartCodon", - "message": "ORF 'tat_exon1' has a mutated start codon: 'ATA'.", - "region": "tat_exon1" - }, - { - "qseqid": "MK114856.1", - "error": "Frameshift", - "message": "ORF 'vpu' at 5791-6039 contains out of frame indels that impact 122 positions.", - "region": "vpu" - }, - { - "qseqid": "MK114856.1", - "error": "InternalStop", - "message": "ORF 'vpu' at 5791-6039 contains an internal stop codon at 5857.", - "region": "vpu" - }, - { - "qseqid": "MK114856.1", - "error": "SequenceDivergence", - "message": "ORF 'vpu' exceeds maximum distance tolerance. It is 1.77195 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.07949.", - "region": "vpu" - }, - { - "qseqid": "MK114856.1", - "error": "Frameshift", - "message": "ORF 'tat_exon2' at 8103-8195 contains out of frame indels that impact 37 positions.", - "region": "tat_exon2" - }, - { - "qseqid": "MK114856.1", - "error": "Deletion", - "message": "ORF 'tat_exon2' exceeds maximum deletion tolerance. Contains 45 deletions with max tolerance of 0 deletions.", - "region": "tat_exon2" - }, - { - "qseqid": "MK114856.1", - "error": "Frameshift", - "message": "ORF 'rev_exon2' at 8104-8379 contains out of frame indels that impact 69 positions.", - "region": "rev_exon2" - }, - { - "qseqid": "MK114856.1", - "error": "InternalStop", - "message": "ORF 'rev_exon2' at 8104-8379 contains an internal stop codon at 8161.", - "region": "rev_exon2" - }, - { - "qseqid": "MK114856.1", - "error": "SequenceDivergence", - "message": "ORF 'rev_exon2' exceeds maximum distance tolerance. It is 0.84783 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.75773.", - "region": "rev_exon2" - }, - { - "qseqid": "MK114856.1", - "error": "Frameshift", - "message": "ORF 'nef' at 8523-9176 contains out of frame indels that impact 403 positions.", - "region": "nef" - }, - { - "qseqid": "MK114856.1", - "error": "InternalStop", - "message": "ORF 'nef' at 8523-9176 contains an internal stop codon at 8724.", - "region": "nef" - }, - { - "qseqid": "MK114856.1", - "error": "SequenceDivergence", - "message": "ORF 'nef' exceeds maximum distance tolerance. It is 1.95 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.34148.", - "region": "nef" - }, - { - "qseqid": "MK114856.1", - "error": "MutatedStartCodon", - "message": "ORF 'nef' has a mutated start codon: 'ATA'.", - "region": "nef" - }, - { - "qseqid": "MK114856.1", - "error": "APOBECHypermutation", - "message": "Query sequence shows evidence of APOBEC3F/G-mediated hypermutation (p = 3.639064030015132e-65).", - "region": null - } - ], - "MK115009.1": [ - { - "qseqid": "MK115009.1", - "error": "Frameshift", - "message": "ORF 'gag' at 303-1821 contains out of frame indels that impact 844 positions.", - "region": "gag" - }, - { - "qseqid": "MK115009.1", - "error": "InternalStop", - "message": "ORF 'gag' at 303-1821 contains an internal stop codon at 348.", - "region": "gag" - }, - { - "qseqid": "MK115009.1", - "error": "SequenceDivergence", - "message": "ORF 'gag' exceeds maximum distance tolerance. It is 1.8444 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.98443.", - "region": "gag" - }, - { - "qseqid": "MK115009.1", - "error": "MutatedStopCodon", - "message": "ORF 'gag' has a mutated stop codon: 'AAA'.", - "region": "gag" - }, - { - "qseqid": "MK115009.1", - "error": "Frameshift", - "message": "ORF 'pol' at 1614-4625 contains out of frame indels that impact 1728 positions.", - "region": "pol" - }, - { - "qseqid": "MK115009.1", - "error": "InternalStop", - "message": "ORF 'pol' at 1614-4625 contains an internal stop codon at 1713.", - "region": "pol" - }, - { - "qseqid": "MK115009.1", - "error": "Frameshift", - "message": "ORF 'env' at 5753-8353 contains out of frame indels that impact 1437 positions.", - "region": "env" - }, - { - "qseqid": "MK115009.1", - "error": "InternalStop", - "message": "ORF 'env' at 5753-8353 contains an internal stop codon at 5849.", - "region": "env" - }, - { - "qseqid": "MK115009.1", - "error": "SequenceDivergence", - "message": "ORF 'env' exceeds maximum distance tolerance. It is 1.87886 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.35595.", - "region": "env" - }, - { - "qseqid": "MK115009.1", - "error": "Frameshift", - "message": "ORF 'vif' at 4570-5148 contains out of frame indels that impact 166 positions.", - "region": "vif" - }, - { - "qseqid": "MK115009.1", - "error": "InternalStop", - "message": "ORF 'vif' at 4570-5148 contains an internal stop codon at 4630.", - "region": "vif" - }, - { - "qseqid": "MK115009.1", - "error": "SequenceDivergence", - "message": "ORF 'vif' exceeds maximum distance tolerance. It is 1.99021 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.78802.", - "region": "vif" - }, - { - "qseqid": "MK115009.1", - "error": "MutatedStartCodon", - "message": "ORF 'vif' has a mutated start codon: 'ATA'.", - "region": "vif" - }, - { - "qseqid": "MK115009.1", - "error": "Frameshift", - "message": "ORF 'vpr' at 5088-5378 contains out of frame indels that impact 88 positions.", - "region": "vpr" - }, - { - "qseqid": "MK115009.1", - "error": "InternalStop", - "message": "ORF 'vpr' at 5088-5378 contains an internal stop codon at 5247.", - "region": "vpr" - }, - { - "qseqid": "MK115009.1", - "error": "SequenceDivergence", - "message": "ORF 'vpr' exceeds maximum distance tolerance. It is 1.28021 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.58981.", - "region": "vpr" - }, - { - "qseqid": "MK115009.1", - "error": "Frameshift", - "message": "ORF 'tat_exon1' at 5359-5573 contains out of frame indels that impact 35 positions.", - "region": "tat_exon1" - }, - { - "qseqid": "MK115009.1", - "error": "Deletion", - "message": "ORF 'tat_exon1' exceeds maximum deletion tolerance. Contains 54 deletions with max tolerance of 0 deletions.", - "region": "tat_exon1" - }, - { - "qseqid": "MK115009.1", - "error": "MutatedStartCodon", - "message": "ORF 'tat_exon1' has a mutated start codon: 'ATA'.", - "region": "tat_exon1" - }, - { - "qseqid": "MK115009.1", - "error": "InternalStop", - "message": "ORF 'vpu' at 5590-5835 contains an internal stop codon at 5815.", - "region": "vpu" - }, - { - "qseqid": "MK115009.1", - "error": "Frameshift", - "message": "ORF 'rev_exon2' at 7936-8211 contains out of frame indels that impact 70 positions.", - "region": "rev_exon2" - }, - { - "qseqid": "MK115009.1", - "error": "InternalStop", - "message": "ORF 'rev_exon2' at 7936-8211 contains an internal stop codon at 7993.", - "region": "rev_exon2" - }, - { - "qseqid": "MK115009.1", - "error": "SequenceDivergence", - "message": "ORF 'rev_exon2' exceeds maximum distance tolerance. It is 0.78261 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.75773.", - "region": "rev_exon2" - }, - { - "qseqid": "MK115009.1", - "error": "Frameshift", - "message": "ORF 'nef' at 8355-8996 contains out of frame indels that impact 395 positions.", - "region": "nef" - }, - { - "qseqid": "MK115009.1", - "error": "InternalStop", - "message": "ORF 'nef' at 8355-8996 contains an internal stop codon at 8796.", - "region": "nef" - }, - { - "qseqid": "MK115009.1", - "error": "SequenceDivergence", - "message": "ORF 'nef' exceeds maximum distance tolerance. It is 1.70049 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.34148.", - "region": "nef" - }, - { - "qseqid": "MK115009.1", - "error": "MutatedStartCodon", - "message": "ORF 'nef' has a mutated start codon: 'ATA'.", - "region": "nef" - }, - { - "qseqid": "MK115009.1", - "error": "APOBECHypermutation", - "message": "Query sequence shows evidence of APOBEC3F/G-mediated hypermutation (p = 1.2040967664149076e-41).", - "region": null - }, - { - "qseqid": "MK115009.1", - "error": "Scramble", - "message": "Sequence is minus-scrambled.", - "region": null - }, - { - "qseqid": "MK115009.1", - "error": "InternalInversion", - "message": "Sequence contains an internal inversion.", - "region": null - } - ], - "MK115387.1": [ - { - "qseqid": "MK115387.1", - "error": "Frameshift", - "message": "ORF 'tat_exon2' at 7870-7962 contains out of frame indels that impact 32 positions.", - "region": "tat_exon2" - }, - { - "qseqid": "MK115387.1", - "error": "Deletion", - "message": "ORF 'tat_exon2' exceeds maximum deletion tolerance. Contains 45 deletions with max tolerance of 0 deletions.", - "region": "tat_exon2" - } - ], - "MK115491.1": [], - "MK116110.1": [ - { - "qseqid": "MK116110.1", - "error": "Frameshift", - "message": "ORF 'gag' at 118-1601 contains out of frame indels that impact 973 positions.", - "region": "gag" - }, - { - "qseqid": "MK116110.1", - "error": "InternalStop", - "message": "ORF 'gag' at 118-1601 contains an internal stop codon at 235.", - "region": "gag" - }, - { - "qseqid": "MK116110.1", - "error": "SequenceDivergence", - "message": "ORF 'gag' exceeds maximum distance tolerance. It is 1.73094 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.98443.", - "region": "gag" - }, - { - "qseqid": "MK116110.1", - "error": "MutatedStartCodon", - "message": "ORF 'gag' has a mutated start codon: 'GCT'.", - "region": "gag" - }, - { - "qseqid": "MK116110.1", - "error": "MutatedStopCodon", - "message": "ORF 'gag' has a mutated stop codon: 'AAA'.", - "region": "gag" - }, - { - "qseqid": "MK116110.1", - "error": "PackagingSignalDeletion", - "message": "Query sequence exceeds maximum deletion tolerance in PSI. Contains 22 deletions with max tolerance of 10 deletions.", - "region": null - }, - { - "qseqid": "MK116110.1", - "error": "MajorSpliceDonorSiteMutated", - "message": "Query sequence has a mutated splice donor site: CC. The context is CCCCCCACTCCCTGACATGCTG.", - "region": null - } - ], - "MK115527.1": [], - "MK114997.1": [ - { - "qseqid": "MK114997.1", - "error": "Frameshift", - "message": "ORF 'env' at 5651-8207 contains out of frame indels that impact 1167 positions.", - "region": "env" - }, - { - "qseqid": "MK114997.1", - "error": "InternalStop", - "message": "ORF 'env' at 5651-8207 contains an internal stop codon at 6695.", - "region": "env" - }, - { - "qseqid": "MK114997.1", - "error": "MutatedStopCodon", - "message": "ORF 'env' has a mutated stop codon: 'AGA'.", - "region": "env" - } - ], - "MK115518.1": [], - "MK115065.1": [ - { - "qseqid": "MK115065.1", - "error": "PackagingSignalDeletion", - "message": "Query sequence exceeds maximum deletion tolerance in PSI. Contains 75 deletions with max tolerance of 10 deletions.", - "region": null - }, - { - "qseqid": "MK115065.1", - "error": "MajorSpliceDonorSiteMutated", - "message": "Query sequence has a missing splice donor site: --. The context is ----------------------.", - "region": null - }, - { - "qseqid": "MK115065.1", - "error": "Scramble", - "message": "Sequence is plus-scrambled.", - "region": null - }, - { - "qseqid": "MK115065.1", - "error": "InternalInversion", - "message": "Sequence contains an internal inversion.", - "region": null - } - ], - "MK115464.1": [ - { - "qseqid": "MK115464.1", - "error": "Frameshift", - "message": "ORF 'gag' at 795-2297 contains out of frame indels that impact 637 positions.", - "region": "gag" - }, - { - "qseqid": "MK115464.1", - "error": "InternalStop", - "message": "ORF 'gag' at 795-2297 contains an internal stop codon at 1257.", - "region": "gag" - }, - { - "qseqid": "MK115464.1", - "error": "SequenceDivergence", - "message": "ORF 'gag' exceeds maximum distance tolerance. It is 1.495 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.98443.", - "region": "gag" - }, - { - "qseqid": "MK115464.1", - "error": "MutatedStartCodon", - "message": "ORF 'gag' has a mutated start codon: 'ATA'.", - "region": "gag" - }, - { - "qseqid": "MK115464.1", - "error": "Frameshift", - "message": "ORF 'pol' at 2090-5101 contains out of frame indels that impact 1716 positions.", - "region": "pol" - }, - { - "qseqid": "MK115464.1", - "error": "InternalStop", - "message": "ORF 'pol' at 2090-5101 contains an internal stop codon at 2189.", - "region": "pol" - }, - { - "qseqid": "MK115464.1", - "error": "Frameshift", - "message": "ORF 'env' at 6229-8799 contains out of frame indels that impact 1449 positions.", - "region": "env" - }, - { - "qseqid": "MK115464.1", - "error": "InternalStop", - "message": "ORF 'env' at 6229-8799 contains an internal stop codon at 6430.", - "region": "env" - }, - { - "qseqid": "MK115464.1", - "error": "SequenceDivergence", - "message": "ORF 'env' exceeds maximum distance tolerance. It is 1.83161 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.35595.", - "region": "env" - }, - { - "qseqid": "MK115464.1", - "error": "Frameshift", - "message": "ORF 'vif' at 5046-5624 contains out of frame indels that impact 248 positions.", - "region": "vif" - }, - { - "qseqid": "MK115464.1", - "error": "InternalStop", - "message": "ORF 'vif' at 5046-5624 contains an internal stop codon at 5253.", - "region": "vif" - }, - { - "qseqid": "MK115464.1", - "error": "SequenceDivergence", - "message": "ORF 'vif' exceeds maximum distance tolerance. It is 1.49427 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.78802.", - "region": "vif" - }, - { - "qseqid": "MK115464.1", - "error": "Frameshift", - "message": "ORF 'tat_exon1' at 5835-6049 contains out of frame indels that impact 129 positions.", - "region": "tat_exon1" - }, - { - "qseqid": "MK115464.1", - "error": "Deletion", - "message": "ORF 'tat_exon1' exceeds maximum deletion tolerance. Contains 195 deletions with max tolerance of 0 deletions.", - "region": "tat_exon1" - }, - { - "qseqid": "MK115464.1", - "error": "MutatedStartCodon", - "message": "ORF 'tat_exon1' has a mutated start codon: 'ATA'.", - "region": "tat_exon1" - }, - { - "qseqid": "MK115464.1", - "error": "Frameshift", - "message": "ORF 'vpu' at 6066-6311 contains out of frame indels that impact 124 positions.", - "region": "vpu" - }, - { - "qseqid": "MK115464.1", - "error": "InternalStop", - "message": "ORF 'vpu' at 6066-6311 contains an internal stop codon at 6132.", - "region": "vpu" - }, - { - "qseqid": "MK115464.1", - "error": "SequenceDivergence", - "message": "ORF 'vpu' exceeds maximum distance tolerance. It is 1.88171 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.07949.", - "region": "vpu" - }, - { - "qseqid": "MK115464.1", - "error": "Frameshift", - "message": "ORF 'tat_exon2' at 8381-8473 contains out of frame indels that impact 37 positions.", - "region": "tat_exon2" - }, - { - "qseqid": "MK115464.1", - "error": "Deletion", - "message": "ORF 'tat_exon2' exceeds maximum deletion tolerance. Contains 45 deletions with max tolerance of 0 deletions.", - "region": "tat_exon2" - }, - { - "qseqid": "MK115464.1", - "error": "Frameshift", - "message": "ORF 'rev_exon2' at 8382-8657 contains out of frame indels that impact 45 positions.", - "region": "rev_exon2" - }, - { - "qseqid": "MK115464.1", - "error": "InternalStop", - "message": "ORF 'rev_exon2' at 8382-8657 contains an internal stop codon at 8439.", - "region": "rev_exon2" - }, - { - "qseqid": "MK115464.1", - "error": "Frameshift", - "message": "ORF 'nef' at 8801-9451 contains out of frame indels that impact 178 positions.", - "region": "nef" - }, - { - "qseqid": "MK115464.1", - "error": "Deletion", - "message": "ORF 'nef' exceeds maximum deletion tolerance. Contains 234 deletions with max tolerance of 48 deletions.", - "region": "nef" - }, - { - "qseqid": "MK115464.1", - "error": "MutatedStartCodon", - "message": "ORF 'nef' has a mutated start codon: 'ATA'.", - "region": "nef" - }, - { - "qseqid": "MK115464.1", - "error": "APOBECHypermutation", - "message": "Query sequence shows evidence of APOBEC3F/G-mediated hypermutation (p = 4.937891251407691e-23).", - "region": null - } - ], - "MK115530.1": [], - "MK115520.1": [ - { - "qseqid": "MK115520.1", - "error": "Frameshift", - "message": "ORF 'pol' at 1988-5004 contains out of frame indels that impact 1225 positions.", - "region": "pol" - }, - { - "qseqid": "MK115520.1", - "error": "InternalStop", - "message": "ORF 'pol' at 1988-5004 contains an internal stop codon at 3188.", - "region": "pol" - }, - { - "qseqid": "MK115520.1", - "error": "MutatedStopCodon", - "message": "ORF 'pol' has a mutated stop codon: 'AGA'.", - "region": "pol" - }, - { - "qseqid": "MK115520.1", - "error": "MajorSpliceDonorSiteMutated", - "message": "Query sequence has a mutated splice donor site: GA. The context is CTGGTAACTAGAGATCGAAAGT.", - "region": null - }, - { - "qseqid": "MK115520.1", - "error": "InternalInversion", - "message": "Sequence contains an internal inversion.", - "region": null - } - ], - "MK115503.1": [], - "MK115570.1": [ - { - "qseqid": "MK115570.1", - "error": "PackagingSignalDeletion", - "message": "Query sequence exceeds maximum deletion tolerance in PSI. Contains 11 deletions with max tolerance of 10 deletions.", - "region": null - }, - { - "qseqid": "MK115570.1", - "error": "MajorSpliceDonorSiteMutated", - "message": "Query sequence has a missing splice donor site: --. The context is GCGGCG-----------CGAAA.", - "region": null - } - ], - "MK115509.1": [], - "MK115702.1": [ - { - "qseqid": "MK115702.1", - "error": "PackagingSignalDeletion", - "message": "Query sequence exceeds maximum deletion tolerance in PSI. Contains 54 deletions with max tolerance of 10 deletions.", - "region": null - }, - { - "qseqid": "MK115702.1", - "error": "MajorSpliceDonorSiteMutated", - "message": "Query sequence has a missing splice donor site: --. The context is ---------------------A.", - "region": null - }, - { - "qseqid": "MK115702.1", - "error": "Scramble", - "message": "Sequence is plus-scrambled.", - "region": null - }, - { - "qseqid": "MK115702.1", - "error": "InternalInversion", - "message": "Sequence contains an internal inversion.", - "region": null - } - ], - "MK115095.1": [ - { - "qseqid": "MK115095.1", - "error": "Frameshift", - "message": "ORF 'gag' at 189-1697 contains out of frame indels that impact 806 positions.", - "region": "gag" - }, - { - "qseqid": "MK115095.1", - "error": "InternalStop", - "message": "ORF 'gag' at 189-1697 contains an internal stop codon at 234.", - "region": "gag" - }, - { - "qseqid": "MK115095.1", - "error": "SequenceDivergence", - "message": "ORF 'gag' exceeds maximum distance tolerance. It is 1.8384 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.98443.", - "region": "gag" - }, - { - "qseqid": "MK115095.1", - "error": "MutatedStartCodon", - "message": "ORF 'gag' has a mutated start codon: 'ATA'.", - "region": "gag" - }, - { - "qseqid": "MK115095.1", - "error": "Frameshift", - "message": "ORF 'pol' at 1490-4501 contains out of frame indels that impact 1865 positions.", - "region": "pol" - }, - { - "qseqid": "MK115095.1", - "error": "InternalStop", - "message": "ORF 'pol' at 1490-4501 contains an internal stop codon at 1589.", - "region": "pol" - }, - { - "qseqid": "MK115095.1", - "error": "Frameshift", - "message": "ORF 'env' at 5629-8229 contains out of frame indels that impact 1426 positions.", - "region": "env" - }, - { - "qseqid": "MK115095.1", - "error": "InternalStop", - "message": "ORF 'env' at 5629-8229 contains an internal stop codon at 5725.", - "region": "env" - }, - { - "qseqid": "MK115095.1", - "error": "SequenceDivergence", - "message": "ORF 'env' exceeds maximum distance tolerance. It is 1.87353 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.35595.", - "region": "env" - }, - { - "qseqid": "MK115095.1", - "error": "Frameshift", - "message": "ORF 'vif' at 4446-5024 contains out of frame indels that impact 332 positions.", - "region": "vif" - }, - { - "qseqid": "MK115095.1", - "error": "InternalStop", - "message": "ORF 'vif' at 4446-5024 contains an internal stop codon at 4557.", - "region": "vif" - }, - { - "qseqid": "MK115095.1", - "error": "SequenceDivergence", - "message": "ORF 'vif' exceeds maximum distance tolerance. It is 1.85469 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.78802.", - "region": "vif" - }, - { - "qseqid": "MK115095.1", - "error": "Frameshift", - "message": "ORF 'tat_exon1' at 5235-5449 contains out of frame indels that impact 35 positions.", - "region": "tat_exon1" - }, - { - "qseqid": "MK115095.1", - "error": "Deletion", - "message": "ORF 'tat_exon1' exceeds maximum deletion tolerance. Contains 54 deletions with max tolerance of 0 deletions.", - "region": "tat_exon1" - }, - { - "qseqid": "MK115095.1", - "error": "MutatedStartCodon", - "message": "ORF 'tat_exon1' has a mutated start codon: 'ATA'.", - "region": "tat_exon1" - }, - { - "qseqid": "MK115095.1", - "error": "Frameshift", - "message": "ORF 'vpu' at 5466-5711 contains out of frame indels that impact 122 positions.", - "region": "vpu" - }, - { - "qseqid": "MK115095.1", - "error": "InternalStop", - "message": "ORF 'vpu' at 5466-5711 contains an internal stop codon at 5532.", - "region": "vpu" - }, - { - "qseqid": "MK115095.1", - "error": "SequenceDivergence", - "message": "ORF 'vpu' exceeds maximum distance tolerance. It is 1.88171 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.07949.", - "region": "vpu" - }, - { - "qseqid": "MK115095.1", - "error": "Frameshift", - "message": "ORF 'rev_exon2' at 7812-8087 contains out of frame indels that impact 69 positions.", - "region": "rev_exon2" - }, - { - "qseqid": "MK115095.1", - "error": "InternalStop", - "message": "ORF 'rev_exon2' at 7812-8087 contains an internal stop codon at 7869.", - "region": "rev_exon2" - }, - { - "qseqid": "MK115095.1", - "error": "APOBECHypermutation", - "message": "Query sequence shows evidence of APOBEC3F/G-mediated hypermutation (p = 8.33506116803153e-40).", - "region": null - } - ], - "MK115490.1": [], - "MK115576.1": [], - "OQ092466": [ - { - "qseqid": "OQ092466", - "error": "MajorSpliceDonorSiteMutated", - "message": "Query sequence has a mutated splice donor site: GC. The context is GCGGCGACTGGCGAGTACGCCA.", - "region": null - } - ], - "OQ092463": [ - { - "qseqid": "OQ092463", - "error": "PackagingSignalDeletion", - "message": "Query sequence exceeds maximum deletion tolerance in PSI. Contains 26 deletions with max tolerance of 10 deletions.", - "region": null - }, - { - "qseqid": "OQ092463", - "error": "MajorSpliceDonorSiteMutated", - "message": "Query sequence has a missing splice donor site: --. The context is ------------GAGTACGCC-.", - "region": null - } - ], - "OQ092465": [ - { - "qseqid": "OQ092465", - "error": "MajorSpliceDonorSiteMutated", - "message": "Query sequence has a mutated splice donor site: GA. The context is GCGGCGACTGGAGAGTACGCCT.", - "region": null - } - ], - "OQ092462": [ - { - "qseqid": "OQ092462", - "error": "PackagingSignalDeletion", - "message": "Query sequence exceeds maximum deletion tolerance in PSI. Contains 23 deletions with max tolerance of 10 deletions.", - "region": null - }, - { - "qseqid": "OQ092462", - "error": "MajorSpliceDonorSiteMutated", - "message": "Query sequence has a missing splice donor site: --. The context is ------------GAGTACGCCA.", - "region": null - } - ], - "OQ092464": [ - { - "qseqid": "OQ092464", - "error": "PackagingSignalDeletion", - "message": "Query sequence exceeds maximum deletion tolerance in PSI. Contains 19 deletions with max tolerance of 10 deletions.", - "region": null - }, - { - "qseqid": "OQ092464", - "error": "MajorSpliceDonorSiteMutated", - "message": "Query sequence has a missing splice donor site: --. The context is GCGGCG----------------.", - "region": null - } - ], - "OQ092467": [ - { - "qseqid": "OQ092467", - "error": "PackagingSignalDeletion", - "message": "Query sequence exceeds maximum deletion tolerance in PSI. Contains 22 deletions with max tolerance of 10 deletions.", - "region": null - }, - { - "qseqid": "OQ092467", - "error": "MajorSpliceDonorSiteMutated", - "message": "Query sequence has a missing splice donor site: --. The context is ------------GAGTACGCCA.", - "region": null - } - ] -} \ No newline at end of file +{} \ No newline at end of file diff --git a/tests/expected-results-large/holistic.json b/tests/expected-results-large/holistic.json index 5e89642..9e26dfe 100644 --- a/tests/expected-results-large/holistic.json +++ b/tests/expected-results-large/holistic.json @@ -1,576 +1 @@ -{ - "KX505501.1": { - "intact": false, - "qlen": 1997, - "hypermutation_probablility": 0.7087072014754221, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 1997, - "blast_sseq_coverage": 0.2498456472525211, - "blast_qseq_coverage": 1.2158237356034052, - "blast_sseq_orfs_coverage": 0.17666166916541728, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 4, - "is_reverse_complement": false - }, - "MN691959": { - "intact": false, - "qlen": 9493, - "hypermutation_probablility": 0.19667690182893238, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9493, - "blast_sseq_coverage": 1.0817040543321672, - "blast_qseq_coverage": 1.1086063415148004, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 3, - "is_reverse_complement": false - }, - "MN692074": { - "intact": false, - "qlen": 4178, - "hypermutation_probablility": 0.36378645339477633, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 4178, - "blast_sseq_coverage": 0.5042189750977567, - "blast_qseq_coverage": 1.1728099569171853, - "blast_sseq_orfs_coverage": 0.411544227886057, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 4, - "is_reverse_complement": false - }, - "MN692145": { - "intact": true, - "qlen": 9689, - "hypermutation_probablility": 0.1672411051048176, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9689, - "blast_sseq_coverage": 1.130479522535501, - "blast_qseq_coverage": 1.1271545051088863, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 3, - "is_reverse_complement": false - }, - "MN090335": { - "intact": false, - "qlen": 9069, - "hypermutation_probablility": 0.1771850809736527, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9069, - "blast_sseq_coverage": 0.9842560197571517, - "blast_qseq_coverage": 1.0603153600176425, - "blast_sseq_orfs_coverage": 1.000374812593703, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 3, - "is_reverse_complement": false - }, - "MN090376": { - "intact": false, - "qlen": 8985, - "hypermutation_probablility": 0.026415767987601813, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 8985, - "blast_sseq_coverage": 0.9784935171846059, - "blast_qseq_coverage": 1.0604340567612687, - "blast_sseq_orfs_coverage": 0.9943778110944528, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 3, - "is_reverse_complement": false - }, - "MK115581.1": { - "intact": true, - "qlen": 9495, - "hypermutation_probablility": 0.6919440876652894, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9495, - "blast_sseq_coverage": 0.987034369211772, - "blast_qseq_coverage": 1.0046340179041602, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 2, - "is_reverse_complement": false - }, - "MK115690.1": { - "intact": false, - "qlen": 9689, - "hypermutation_probablility": 0.051230576250981485, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9689, - "blast_sseq_coverage": 1.004630582424367, - "blast_qseq_coverage": 1.0113530808132933, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 3, - "is_reverse_complement": false - }, - "MK115571.1": { - "intact": false, - "qlen": 9394, - "hypermutation_probablility": 0.8029570594372466, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9394, - "blast_sseq_coverage": 0.9866227618851615, - "blast_qseq_coverage": 1.0113902490951672, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 2, - "is_reverse_complement": false - }, - "MK115514.1": { - "intact": false, - "qlen": 9382, - "hypermutation_probablility": 0.6482462132632603, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9382, - "blast_sseq_coverage": 0.9864169582218564, - "blast_qseq_coverage": 1.0173736943082499, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 2, - "is_reverse_complement": false - }, - "MK115488.1": { - "intact": false, - "qlen": 9623, - "hypermutation_probablility": 0.6534999185838631, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9623, - "blast_sseq_coverage": 1.0255196542498457, - "blast_qseq_coverage": 1.0325262392185388, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 6, - "is_reverse_complement": false - }, - "MK115030.1": { - "intact": false, - "qlen": 9126, - "hypermutation_probablility": 0.032014462397289556, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9126, - "blast_sseq_coverage": 0.994031693764149, - "blast_qseq_coverage": 1.0655270655270654, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 3, - "is_reverse_complement": false - }, - "MK115498.1": { - "intact": true, - "qlen": 9461, - "hypermutation_probablility": 0.83547963060225, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9461, - "blast_sseq_coverage": 0.9866227618851615, - "blast_qseq_coverage": 1.0080329774865235, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 2, - "is_reverse_complement": false - }, - "MK115211.1": { - "intact": false, - "qlen": 9032, - "hypermutation_probablility": 0.11818291879607423, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9032, - "blast_sseq_coverage": 0.995060712080675, - "blast_qseq_coverage": 1.0598981399468557, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 3, - "is_reverse_complement": false - }, - "MK115158.1": { - "intact": false, - "qlen": 9143, - "hypermutation_probablility": 0.002572269807584293, - "inferred_subtype": "Ref.47_BF.ES.08.P1942.GQ372987", - "blast_matched_qlen": 9143, - "blast_sseq_coverage": 0.9921577414295317, - "blast_qseq_coverage": 0.9699223449633599, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 234, - "orfs_end": 8211, - "blast_n_conseqs": 1, - "is_reverse_complement": false - }, - "MK114705.1": { - "intact": false, - "qlen": 9411, - "hypermutation_probablility": 0.14584270737492833, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9411, - "blast_sseq_coverage": 1.098065445564931, - "blast_qseq_coverage": 1.122622463075125, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 6, - "is_reverse_complement": false - }, - "MK114856.1": { - "intact": false, - "qlen": 9477, - "hypermutation_probablility": 1.0, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9477, - "blast_sseq_coverage": 1.0485696645400289, - "blast_qseq_coverage": 1.0812493405085997, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 4, - "is_reverse_complement": false - }, - "MK115009.1": { - "intact": false, - "qlen": 9207, - "hypermutation_probablility": 1.0, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9207, - "blast_sseq_coverage": 0.9965013377238114, - "blast_qseq_coverage": 1.0590854784403172, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 3, - "is_reverse_complement": false - }, - "MK115387.1": { - "intact": false, - "qlen": 9136, - "hypermutation_probablility": 0.5436355526687852, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9136, - "blast_sseq_coverage": 0.9796254373327845, - "blast_qseq_coverage": 1.040936952714536, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 2, - "is_reverse_complement": false - }, - "MK115491.1": { - "intact": true, - "qlen": 9422, - "hypermutation_probablility": 0.8961809048805741, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9422, - "blast_sseq_coverage": 1.0037044659394938, - "blast_qseq_coverage": 1.0299299511780937, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 2, - "is_reverse_complement": false - }, - "MK116110.1": { - "intact": false, - "qlen": 8967, - "hypermutation_probablility": 0.07021438897893317, - "inferred_subtype": "Ref.B.TH.90.BK132.AY173951", - "blast_matched_qlen": 8967, - "blast_sseq_coverage": 0.9957759004001778, - "blast_qseq_coverage": 0.9972119995539199, - "blast_sseq_orfs_coverage": 0.9986220719027934, - "orfs_start": 140, - "orfs_end": 8123, - "blast_n_conseqs": 3, - "is_reverse_complement": false - }, - "MK115527.1": { - "intact": true, - "qlen": 9481, - "hypermutation_probablility": 0.770862998910788, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9481, - "blast_sseq_coverage": 0.9867256637168141, - "blast_qseq_coverage": 1.0056956017297753, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 2, - "is_reverse_complement": false - }, - "MK114997.1": { - "intact": false, - "qlen": 9055, - "hypermutation_probablility": 0.05560625344150194, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9055, - "blast_sseq_coverage": 0.9784935171846059, - "blast_qseq_coverage": 1.0516841524019878, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 2, - "is_reverse_complement": false - }, - "MK115518.1": { - "intact": true, - "qlen": 9537, - "hypermutation_probablility": 0.6408152618300496, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9537, - "blast_sseq_coverage": 0.9847705289154147, - "blast_qseq_coverage": 0.9996854356715948, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 3, - "is_reverse_complement": false - }, - "MK115065.1": { - "intact": false, - "qlen": 9214, - "hypermutation_probablility": 0.033954952452739495, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9214, - "blast_sseq_coverage": 1.0080263428689031, - "blast_qseq_coverage": 1.069459518124593, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 6, - "is_reverse_complement": false - }, - "MK115464.1": { - "intact": false, - "qlen": 9663, - "hypermutation_probablility": 1.0, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9663, - "blast_sseq_coverage": 0.9838444124305412, - "blast_qseq_coverage": 0.9893407844354756, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 2, - "is_reverse_complement": false - }, - "MK115530.1": { - "intact": true, - "qlen": 9544, - "hypermutation_probablility": 0.5812621948015355, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9544, - "blast_sseq_coverage": 0.9866227618851615, - "blast_qseq_coverage": 0.9992665549036044, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 2, - "is_reverse_complement": false - }, - "MK115520.1": { - "intact": false, - "qlen": 9589, - "hypermutation_probablility": 0.5225247969864292, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9589, - "blast_sseq_coverage": 0.9786993208479111, - "blast_qseq_coverage": 0.987902805297737, - "blast_sseq_orfs_coverage": 1.012118940529735, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 3, - "is_reverse_complement": false - }, - "MK115503.1": { - "intact": true, - "qlen": 9617, - "hypermutation_probablility": 0.42870398270204335, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9617, - "blast_sseq_coverage": 0.987034369211772, - "blast_qseq_coverage": 0.9953207861079338, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 2, - "is_reverse_complement": false - }, - "MK115570.1": { - "intact": false, - "qlen": 9485, - "hypermutation_probablility": 0.7406166892211931, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9485, - "blast_sseq_coverage": 0.9866227618851615, - "blast_qseq_coverage": 1.0057986294148655, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 2, - "is_reverse_complement": false - }, - "MK115509.1": { - "intact": true, - "qlen": 9353, - "hypermutation_probablility": 0.7883768413537747, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9353, - "blast_sseq_coverage": 0.9866227618851615, - "blast_qseq_coverage": 1.0197797498128942, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 2, - "is_reverse_complement": false - }, - "MK115702.1": { - "intact": false, - "qlen": 9098, - "hypermutation_probablility": 0.1454272422215308, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9098, - "blast_sseq_coverage": 0.9874459765383824, - "blast_qseq_coverage": 1.0596834469114091, - "blast_sseq_orfs_coverage": 1.0198650674662668, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 4, - "is_reverse_complement": false - }, - "MK115095.1": { - "intact": false, - "qlen": 9137, - "hypermutation_probablility": 1.0, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9137, - "blast_sseq_coverage": 0.9907388351512657, - "blast_qseq_coverage": 1.060085367188355, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 2, - "is_reverse_complement": false - }, - "MK115490.1": { - "intact": true, - "qlen": 9347, - "hypermutation_probablility": 0.8875203448314265, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9347, - "blast_sseq_coverage": 0.9848734307470673, - "blast_qseq_coverage": 1.0204343639670483, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 3, - "is_reverse_complement": false - }, - "MK115576.1": { - "intact": true, - "qlen": 9266, - "hypermutation_probablility": 0.8197662757563093, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9266, - "blast_sseq_coverage": 0.9917678534677917, - "blast_qseq_coverage": 1.0342110943233327, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 3, - "is_reverse_complement": false - }, - "OQ092466": { - "intact": false, - "qlen": 9686, - "hypermutation_probablility": 0.3858261890626, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9686, - "blast_sseq_coverage": 1.1015641078411196, - "blast_qseq_coverage": 1.1192442700805285, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 3, - "is_reverse_complement": false - }, - "OQ092463": { - "intact": false, - "qlen": 9605, - "hypermutation_probablility": 0.21628713708846803, - "inferred_subtype": "Ref.B.TH.90.BK132.AY173951", - "blast_matched_qlen": 9605, - "blast_sseq_coverage": 1.0529124055135617, - "blast_qseq_coverage": 0.9884435190005205, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 140, - "orfs_end": 8123, - "blast_n_conseqs": 2, - "is_reverse_complement": false - }, - "OQ092465": { - "intact": false, - "qlen": 9659, - "hypermutation_probablility": 0.02412789935966586, - "inferred_subtype": "Ref.28_BF.BR.99.BREPM12817.DQ085874", - "blast_matched_qlen": 9659, - "blast_sseq_coverage": 1.0429696287964005, - "blast_qseq_coverage": 0.9620043482762191, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 200, - "orfs_end": 8207, - "blast_n_conseqs": 2, - "is_reverse_complement": false - }, - "OQ092462": { - "intact": false, - "qlen": 9714, - "hypermutation_probablility": 0.10883643311676816, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9714, - "blast_sseq_coverage": 1.1306853261988064, - "blast_qseq_coverage": 1.1301214741610048, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 3, - "is_reverse_complement": false - }, - "OQ092464": { - "intact": false, - "qlen": 9556, - "hypermutation_probablility": 0.006887768010151674, - "inferred_subtype": "Ref.28_BF.BR.99.BREPM12817.DQ085874", - "blast_matched_qlen": 9556, - "blast_sseq_coverage": 1.0427446569178853, - "blast_qseq_coverage": 0.9678735872750105, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 200, - "orfs_end": 8207, - "blast_n_conseqs": 2, - "is_reverse_complement": false - }, - "OQ092467": { - "intact": false, - "qlen": 9936, - "hypermutation_probablility": 0.6438715160567257, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9936, - "blast_sseq_coverage": 1.1308911298621116, - "blast_qseq_coverage": 1.0962157809983897, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 3, - "is_reverse_complement": false - } -} \ No newline at end of file +{} \ No newline at end of file diff --git a/tests/expected-results-large/regions.json b/tests/expected-results-large/regions.json index 1de7062..9e26dfe 100644 --- a/tests/expected-results-large/regions.json +++ b/tests/expected-results-large/regions.json @@ -1,6398 +1 @@ -{ - "KX505501.1": [ - { - "region": "gag", - "start": 336, - "end": 1745, - "orientation": "forward", - "distance": 0.41298449612403343, - "indel_impact": 69, - "protein": "MGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ", - "nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTGGATAGATGGGAAAAAATTCGGCTAAGGCCAGGAGGAAATAAAAGATATAAACTAAAACATATAGTATGGGCAAGCAGAGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGCTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCGACTCTTAAGACAGGATCAGAAGAACTTAAGTCTTTGTATAATACAATAGCAGTCCTCTATTGTGTACATCAAAAAATAGATGTGAAAGACACCAAGGAAGCCTTAGACAAGATAGAAGAAGAGCAAAACAAAATTAGGAAACAAGCACAGCAAACCGCAGCTGCAGGAAGTAGCAGTCAGGTCAGCCACAATTACCCTGTAGTGCAGAATCATCAGGGGCAAATGGTATATCAGGCCCTGTCACCAAGAACTTTAAATGCATGGGTAAAAGTAATAGAGGAAAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGACTTAAACACCATGCTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATTAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAGATTTATAAAAGATGGATAATCCTAGGGTTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCTTTTAGAGACTATGTAGACCGGTTCTATAAAACCCTAAGAGCCGAACAAGCCACACAGGAAGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCGAACCCAGATTGTAAAACTATTTTAAAAGCATTAGGACCAGCAGCCACACTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGGACCCAGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAATGACAAATTCAGCTACAGTAATGATGCAGAAAGGCAATTTTAGGAACCAAAGAAAAACTGTTAAGTGCTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACATCAAATGAAAGACTGTACTGAGAGACAGGCTAATTTTGTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACACCAACT", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1628, - "end": 1745, - "orientation": "forward", - "distance": 2.1358565737051802, - "indel_impact": 1950, - "protein": "FCRENLAFPQGKAGEFPSEQTRANSPTSRELQVWGRDTN", - "nucleotides": "TTTTGTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACACCAACT", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "env", - "start": 1746, - "end": 1745, - "orientation": "forward", - "distance": 2.1118604651162807, - "indel_impact": 1714, - "protein": "MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ", - "nucleotides": "", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "vif", - "start": 1746, - "end": 1745, - "orientation": "forward", - "distance": 2.0488262910798123, - "indel_impact": 386, - "protein": "MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ", - "nucleotides": "", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 1746, - "end": 1745, - "orientation": "forward", - "distance": 2.0036496350364965, - "indel_impact": 194, - "protein": "MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ", - "nucleotides": "", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 1746, - "end": 1745, - "orientation": "forward", - "distance": 2.218055555555555, - "indel_impact": 144, - "protein": "MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPT", - "nucleotides": "", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 1746, - "end": 1745, - "orientation": "forward", - "distance": 2.2499999999999996, - "indel_impact": 51, - "protein": "MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPT", - "nucleotides": "", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 1746, - "end": 1745, - "orientation": "forward", - "distance": 2.044, - "indel_impact": 166, - "protein": "MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ", - "nucleotides": "", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "tat_exon2", - "start": 1746, - "end": 1745, - "orientation": "forward", - "distance": 2.2419354838709675, - "indel_impact": 62, - "protein": "RCIRSTTRTADTELFTRDFPLGTFQ", - "nucleotides": "", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 1746, - "end": 1745, - "orientation": "forward", - "distance": 2.214130434782609, - "indel_impact": 184, - "protein": "RCIRSTTRTADTELFTRDFPLGTFQ", - "nucleotides": "", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 1746, - "end": 1777, - "orientation": "forward", - "distance": 2.03710407239819, - "indel_impact": 425, - "protein": "MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ", - "nucleotides": "AGATGCATCCGGAGTACTACAAGGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MN691959": [ - { - "region": "gag", - "start": 639, - "end": 2141, - "orientation": "forward", - "distance": 0.05400000000000005, - "indel_impact": 0, - "protein": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALEKIEEEQNKSKKKAQQAAADTGHSNQISQNYPIVQNIQGQVVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNXXXPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKECTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPSQKQEPIDKELYPLTSLRSLFGNDPSSQ", - "nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGAGAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAACCAGATCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAGTGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCYWYCTWTCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAAGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCWGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGAGTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1934, - "end": 4945, - "orientation": "forward", - "distance": 0.03585657370517925, - "indel_impact": 0, - "protein": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSLSEAGADRQGTVSFNFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTRALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKITTESIVIWGKTPRFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNKGRQKVVPLTNTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKILFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSATVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAAAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAGAGCATTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGGAAATATGCAAGAACGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGTATAGTAATATGGGGAAAGACTCCTAGATTTAAACTACCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAATATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCTTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGGTATGTTACTAACAAAGGAAGACAAAAGGTTGTCCCCCTAACTAACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAATACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTGCTACGGTTAAGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4890, - "end": 5468, - "orientation": "forward", - "distance": 0.0625, - "indel_impact": 0, - "protein": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYERTHPRISSEVYIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH", - "nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGAACTCATCCAAGAATAAGTTCAGAAGTATACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACATATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACAAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5408, - "end": 5698, - "orientation": "forward", - "distance": 0.0625, - "indel_impact": 0, - "protein": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTQQKRARNGASRS", - "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCAACAGAAGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5679, - "end": 5893, - "orientation": "forward", - "distance": 0.25, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPKTACTTCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRPPQDSQTHQVSLSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCACTTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATGACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5818, - "end": 5893, - "orientation": "forward", - "distance": 0.46153846153846145, - "indel_impact": 0, - "protein": "MAGRSGDSDEDLLKTVRLIKFLYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5910, - "end": 6155, - "orientation": "forward", - "distance": 1.704878048780488, - "indel_impact": 122, - "protein": "MQPIQIAIVALVVAIIIAIVV", - "nucleotides": "ATGCAACCTATACAAATAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGATCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGGAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAGTGTCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATATTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 6070, - "end": 8655, - "orientation": "forward", - "distance": 0.10139372822299642, - "indel_impact": 0, - "protein": "MRVKEKCQHLWRWGWRWGTMLLGILMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDAEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLGNATNTNSSNTNSSSGEMMMEKGEIKNCSFNISTSIRNKVQKEYAFFYKLDIVPIDNDTTSYTLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEDVVIRSANFTDNAKTIIVQLNQSVEINCTRPNNNTRKSIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNATLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQFINMWQEVGKAMYAPPISGQIRCSSNITGLLLTRDGGNNNNGSEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNNMTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPNPGGPDRPEGIEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGAYRAIRHIPRRIRQGLERILL", - "nucleotides": "ATGAGAGTGAAGGAGAAGTGTCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATATTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATGCAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGGGGAATGCTACTAATACCAATAGTAGTAATACCAATAGTAGTAGCGGGGAAATGATGATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAAATAAGGTGCAGAAAGAATATGCATTCTTTTATAAACTTGATATAGTACCAATAGATAATGATACTACCAGCTATACGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGATGTAGTAATTAGATCTGCCAATTTCACAGACAATGCTAAAACCATAATAGTACAGCTGAACCAATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGTATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATGCCACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACACTCCCATGCAGAATAAAACAATTTATAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGCGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAACAACAATGGGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAACAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGCTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATAACATGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAAACCCGGGGGGACCCGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTAGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAATATTGGAGTCAGGAGCTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCTATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTATAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8237, - "end": 8329, - "orientation": "forward", - "distance": 0.19354838709677424, - "indel_impact": 32, - "protein": "RPTSQTRGDPTGPKE", - "nucleotides": "AGACCCACCTCCCAAACCCGGGGGGACCCGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8238, - "end": 8513, - "orientation": "forward", - "distance": 0.16304347826086962, - "indel_impact": 0, - "protein": "DPPPKPGGTRQARRNRRRRWRERQRQIHSISERILSTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTILESGAKE", - "nucleotides": "GACCCACCTCCCAAACCCGGGGGGACCCGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTAGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAATATTGGAGTCAGGAGCTAAAGAATAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8657, - "end": 9277, - "orientation": "forward", - "distance": 0.043689320388349495, - "indel_impact": 0, - "protein": "MGGKWSKSSVIGWPTVRERMRRAEPAADGVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGIRYPLTFGWCYKLVPVEPDKVEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC", - "nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATGGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATCAGATATCCATTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGGTAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGAATGGATGACCCTGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MN692074": [ - { - "region": "gag", - "start": 789, - "end": 2291, - "orientation": "forward", - "distance": 0.14990059642147102, - "indel_impact": 0, - "protein": "MGARASVLSAGELDKWEKIRLRPGGKKKYRLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTIATLYCVHQKIEVKDTKEALEKVEEEQNKSKKKAQQAAADTGNNSQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIIMGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQKGNFRNQKRNVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPAQKQEPIDKELYPLASLRSLFGNDPSSQ", - "nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGTGCGGGAGAATTAGACAAATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAATAGCAACCCTCTATTGTGTGCATCAAAAGATAGAGGTAAAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGCAGCAGCTGACACAGGAAACAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCACCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAGCCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCATGGGATTAAATAAAATAGTAAGGATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGATCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTGAAAGCATTGGGACCAGCCGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAAGCAAGAGTTTTGGCGGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAAAGGCAATTTTAGGAACCAAAAAAGGAATGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCGCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 2084, - "end": 4081, - "orientation": "forward", - "distance": 0.8050695825049854, - "indel_impact": 676, - "protein": "FFREDLAFPQGKAREFPSEQTRANSPTRRELQVWGRDNNSRSEAGADRQGTVSFSFPQITLWQRPLVTIRIGGQLKEALLDTGADDTVLEEMNLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLRKKKSVTVLDVGDAYFSVPLDKEFRKYTAFTIPSRNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPEIVIYQYMDDLYVGSDLEIGQHRIKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEIVPLTKEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYAKMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI", - "nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCGCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAGGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAGAAAGAAAAAATCAGTAACAGTACTGGATGTGGGGGATGCATATTTTTCAGTTCCCTTAGATAAAGAATTTAGGAAATATACTGCATTTACCATACCCAGTAGAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATTTTAGAGCCTTTTAGAAAACAAAATCCAGAAATAGTTATCTATCAATACATGGATGATTTATATGTAGGATCTGATTTAGAAATAGGGCAGCATAGAATAAAAATAGAGGAACTGAGACAACATCTGTTGAGATGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCCCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAATAGTACCACTAACAAAAGAAGCAGAGCTGGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTATATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGTCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAAAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCCATACAAAAAGAAACATGGGAAGCATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAATTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGAT", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "env", - "start": 4082, - "end": 4081, - "orientation": "forward", - "distance": 2.055707762557078, - "indel_impact": 1714, - "protein": "MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI", - "nucleotides": "", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "vif", - "start": 4082, - "end": 4081, - "orientation": "forward", - "distance": 1.997872340425532, - "indel_impact": 386, - "protein": "MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI", - "nucleotides": "", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 4082, - "end": 4081, - "orientation": "forward", - "distance": 1.9950980392156863, - "indel_impact": 194, - "protein": "MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI", - "nucleotides": "", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 4082, - "end": 4081, - "orientation": "forward", - "distance": 2.218055555555555, - "indel_impact": 144, - "protein": "MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPD", - "nucleotides": "", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 4082, - "end": 4081, - "orientation": "forward", - "distance": 2.2499999999999996, - "indel_impact": 51, - "protein": "MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPD", - "nucleotides": "", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 4082, - "end": 4081, - "orientation": "forward", - "distance": 2.0005208333333333, - "indel_impact": 166, - "protein": "MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI", - "nucleotides": "", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "tat_exon2", - "start": 4082, - "end": 4081, - "orientation": "forward", - "distance": 2.2419354838709675, - "indel_impact": 62, - "protein": "TQWRALRCCI", - "nucleotides": "", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 4082, - "end": 4081, - "orientation": "forward", - "distance": 2.214130434782609, - "indel_impact": 184, - "protein": "TQWRALRCCI", - "nucleotides": "", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 4082, - "end": 4085, - "orientation": "forward", - "distance": 2.0037162162162163, - "indel_impact": 412, - "protein": "MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI", - "nucleotides": "ACCC", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MN692145": [ - { - "region": "gag", - "start": 775, - "end": 2280, - "orientation": "forward", - "distance": 0.16267465069860276, - "indel_impact": 0, - "protein": "MGARASVLSGGKLDQWEKIRLRPGGKKKYQLKHIVWASKELERFAVNPGLLETTEGCRQILGQLQPSLQTGSEELKSLYNTVATLYCVHQRIDVKDTKEALDKIEEEQNKSKEKAKQAAADTGNSNQVSQNYPIVQNLQGQMVHQAISPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKESFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPGATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRSQRKIVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKQESTGKELYPPLASLRSLFGNDPSSQ", - "nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAAAATTAGATCAATGGGAGAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATCAATTAAAACATATAGTATGGGCAAGCAAGGAACTAGAACGATTTGCAGTTAATCCTGGCCTGTTAGAAACAACAGAAGGATGTAGACAAATACTGGGACAGCTACAACCATCTCTTCAGACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGATGTAAAAGACACCAAGGAAGCATTAGATAAGATAGAGGAAGAGCAAAACAAGAGTAAGGAAAAAGCAAAACAAGCAGCAGCTGACACAGGAAACAGCAACCAGGTCAGCCAAAATTACCCTATAGTGCAGAACCTTCAGGGCCAAATGGTACACCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAATAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACCACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGGATGTATAGTCCTACCAGCATTCTGGATATAAGACAAGGACCAAAAGAATCCTTTAGAGATTATGTAGACCGGTTCTACAAAACTCTAAGAGCTGAGCAAGCCTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCAAACCCAGATTGTAAGACTATCTTAAAAGCATTGGGACCAGGAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAAGCAAGAGTCTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAGCCAAAGGAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAGGGGCATATAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAAAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGAAATTTCCTTCAGAGCAGGCCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGAGTCGACAGGCAAGGAACTGTATCCTCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 2070, - "end": 5084, - "orientation": "forward", - "distance": 0.10796019900497522, - "indel_impact": 0, - "protein": "FFRENLAFPQGEARKFPSEQARANSPTRRELQVWGRDNNSLSEAGVDRQGTVSSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQISIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALTEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQCSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKESWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGAKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARVRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGASNRETKLGKAGYVTDRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLINKEKVYLAWVPAHKGIGGNEQVDKLVSTGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAREIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKVKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGAAATTTCCTTCAGAGCAGGCCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGAGTCGACAGGCAAGGAACTGTATCCTCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTACCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATTAAAGTAAGACAGTATGATCAGATATCCATAGAAATCTGTGGACATAAAGCTATAGGGACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAAATTGGTTGTACTTTAAATTTCCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAACAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGCACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAAAGAACTCAAGATTTCTGGGAAGTTCAATTAGGGATACCACATCCAGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAGGATTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAATGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAATTGAGACAACATCTGTTGAGGTGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGAAAGCTGGACTGTCAATGACATACAAAAGTTAGTGGGTAAATTGAATTGGGCAAGTCAGATTTATGCAGGGATCAAAGTAAAACAATTATGTAAACTTCTTAGGGGAGCCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTTTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAGATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAGTGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAGCCACTGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAGGTTACCCATACAAAAAGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCATCTAATAGAGAGACTAAATTAGGAAAAGCAGGATATGTTACTGACAGAGGAAGACAGAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAACAGTTAATAAACAAGGAAAAGGTCTACTTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTTAGTACTGGAATTAGAAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGAAGAACATGAGAAATACCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTGGTAGCAAGAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAGCTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGACAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGTAATTTCACTAGTACTACAGTTAAGGCCGCCTGTTGGTGGGCGGGGATCAAGCAGGAGTTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGACCAGGCTGAACATCTTAAAACAGCAGTACAAATGGCAGTATTCATTCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTGATACAAGACAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGTAAAAATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 5029, - "end": 5607, - "orientation": "forward", - "distance": 0.21875, - "indel_impact": 0, - "protein": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKAKGWFYRHHYESHHPRISSEVHVPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPNLADQLIHLYYFDCFSESAIRSAILGHIVSPSCEYQAGHNKVGSLQYLALAALITPKRRKPPLPSVAKLTEDRWNKPQKTKGHRGSHIRNGH", - "nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAAGGGATGGTTTTATAGACATCACTATGAAAGCCATCATCCAAGAATAAGTTCAGAAGTACATGTCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGATTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCCAACCTAGCAGACCAACTGATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAGTGCCATATTAGGACATATAGTTAGTCCTAGTTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCATTAGCAGCATTAATAACACCAAAAAGGAGAAAGCCCCCTTTGCCTAGTGTTGCAAAGCTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACCGAGGGAGCCATATCAGGAATGGACACTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5547, - "end": 5837, - "orientation": "forward", - "distance": 0.34375, - "indel_impact": 0, - "protein": "MEQAPEDQGPPREPYQEWTLELLEELKNEAVRHFPRMWLHGLGQHIYDTYGDTWTGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRTRNGANRS", - "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACCGAGGGAGCCATATCAGGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATGTGGCTCCATGGCTTAGGGCAACATATCTATGACACTTATGGGGATACTTGGACAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTCATTCATTTCAGGATTGGGTGTCAACATAGCAGAATAGGCATTATTCGCCAGAGGAGAACAAGAAATGGAGCCAATAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5818, - "end": 6032, - "orientation": "forward", - "distance": 0.25, - "indel_impact": 0, - "protein": "MEPIDPRLEPWKHPGSQPKTACTSCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRASQDSQTHQVSLSKX", - "nucleotides": "ATGGAGCCAATAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGCCAGCCTAAGACTGCTTGTACCTCTTGCTATTGTAAAAAGTGTTGCTTTCATTGTCAAGTCTGTTTCATGACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5957, - "end": 6032, - "orientation": "forward", - "distance": 0.34615384615384626, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLKTVRLIKFLYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 6049, - "end": 6297, - "orientation": "forward", - "distance": 0.6733333333333333, - "indel_impact": 0, - "protein": "MNSLQISAIVAIVVAIILAIVVWSIVFIEYRKILRQRKIDRLIDRIRERAEDSGNESEGDQEELSTALMEMGHHAPWDVDDL", - "nucleotides": "ATGAACTCTTTACAAATATCAGCAATAGTAGCAATAGTAGTAGCAATAATACTAGCAATAGTTGTGTGGTCTATAGTATTCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGATAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAACTATCAACAGCACTTATGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 6212, - "end": 8782, - "orientation": "forward", - "distance": 0.5647651006711409, - "indel_impact": 0, - "protein": "MRVKGIRKNYQQHLWRWGTMLLGMLMICSAAGNLWVTVYYGVPVWKEATTTLFCASDAKAYGTEVHNVWATHACVPTDPNPQEVRLENVTENFNMWRNPMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDLGQESTNNNITGSIEKGEMKNCSFNISSSKGSKGHKEYASFYKLDVVPIGNNKTNGNYSSYRLISCNTSVITQACPKVSFEPIPIHFCAPAGYAILKCNDKQFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSANFTDNAKTIIVQLNQSVEITCTRPGNNTIRGVHMGPGKAFYAGAITGDIRKAHCIINKAKWNSTLRLVVAKLSEQFRNKTINFTQPSGGDPEIVTHSFNCGGEFFYCNTTKLFNSTWNATHMINGTWNDTEGNITLPCRIKQIINMWQEVGKAMYAPPIRGQIRCSSNITGLLLTRDGGKTNESNTTEIFRPGGGDMRDNWRSELYKYKVVRIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGATSMALTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGVKQLQARVLALERYLKDQQLLGFWGCSGKLICTTDVPWNASWSNKSLGQIWENMTWMEWEREIDNYTSLIYTLITESQIQQEQNEKELLELDKWANLWNWFGITNWLWYIKIFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTRFPAPRGPDRPEGIEEEGGERDRDRSAPLVKGFLALIWVDLRSLCLFSYHRLRDLLLIITRIVELLGRRGWEVLKYLWSLLQYWTQELKNSAVSLLNATAIAVAEGTDRIIEVLQRVFRAILHIPTRIRQGFERALL", - "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAACTATCAACAGCACTTATGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTGCAGGCAATTTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGGTACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTGAGATTGGAAAATGTGACAGAAAATTTTAATATGTGGAGAAATCCAATGGTAGAACAGATGCATGAGGATATAATTAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAGTTGACCCCACTCTGTGTCACTTTAAATTGCACTGATTTGGGACAGGAGAGTACCAATAATAATATCACAGGAAGTATAGAGAAAGGAGAAATGAAAAACTGCTCTTTCAATATCTCCTCAAGCAAAGGAAGTAAGGGGCATAAAGAATATGCATCTTTTTATAAACTTGATGTAGTACCAATAGGTAATAATAAGACTAATGGTAATTATTCTAGCTATAGGTTGATAAGTTGTAATACCTCAGTCATTACACAGGCCTGTCCAAAAGTATCATTTGAGCCAATTCCCATACATTTTTGTGCCCCGGCTGGGTATGCGATTCTAAAGTGTAATGATAAACAATTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAGTGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGTAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGCCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACCAATCTGTAGAAATTACTTGTACAAGACCCGGTAACAATACAATAAGAGGGGTTCACATGGGACCAGGGAAGGCATTTTATGCAGGAGCAATAACAGGAGATATAAGAAAAGCACATTGTATCATTAATAAAGCAAAATGGAACAGTACTTTAAGACTGGTAGTTGCAAAATTAAGTGAGCAATTTAGGAATAAAACAATAAACTTTACTCAACCCTCAGGAGGGGACCCAGAAATTGTAACACACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACAACAAAACTGTTTAATAGTACTTGGAATGCAACACACATGATTAATGGTACTTGGAATGATACTGAGGGAAATATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGAGGACAAATTAGATGTTCATCCAATATTACAGGGTTGCTATTAACAAGAGATGGTGGTAAGACTAACGAGAGCAACACCACCGAGATCTTTAGACCTGGAGGAGGAGATATGAGAGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAGAATTGAACCATTAGGGGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTCTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAACGTCGATGGCGTTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATTTGTTGCAACTCACAGTCTGGGGCGTCAAGCAGCTCCAGGCAAGAGTCCTGGCTCTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGTTTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGATGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGGACAAATTTGGGAGAACATGACCTGGATGGAGTGGGAAAGAGAAATTGATAATTACACAAGCTTAATATACACTTTAATTACAGAATCGCAGATCCAACAAGAACAGAATGAAAAAGAATTGTTGGAATTGGATAAATGGGCAAATTTGTGGAATTGGTTTGGCATAACAAACTGGCTGTGGTACATAAAAATATTTATAATGATAGTAGGAGGCTTAGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTTCAGACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGCTCCATTAGTGAAAGGATTCTTAGCACTTATCTGGGTCGATCTACGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATAATAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGTCCTCAAATACTTGTGGAGTCTCCTACAGTATTGGACACAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGATTATAGAAGTATTACAAAGAGTTTTTAGAGCTATCCTCCATATCCCTACAAGAATAAGACAGGGCTTTGAAAGGGCTTTGCTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8364, - "end": 8456, - "orientation": "forward", - "distance": 0.3870967741935485, - "indel_impact": 0, - "protein": "RPASQPRGDPTGPKESKKKVERETETDPLH", - "nucleotides": "AGACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGCTCCATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8365, - "end": 8640, - "orientation": "forward", - "distance": 0.26086956521739135, - "indel_impact": 0, - "protein": "DPLPSPEGTRQARRNRRRRWRERQRQIRSISERILSTYLGRSTEPVPLQLPPLERLTLDNNEDCGTSGTQGVGSPQILVESPTVLDTGTKE", - "nucleotides": "GACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGCTCCATTAGTGAAAGGATTCTTAGCACTTATCTGGGTCGATCTACGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATAATAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGTCCTCAAATACTTGTGGAGTCTCCTACAGTATTGGACACAGGAACTAAAGAATAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8784, - "end": 9386, - "orientation": "forward", - "distance": 0.40765550239234427, - "indel_impact": 0, - "protein": "MGGKWSKSSLVGWPNVRERMRRAEPAADGVGAVSRDLERHGAITSSNTATNNADCAWLEAQKEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQKRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCFKLVPVEPTDRENDRLLHPASLQGMEDPEGEVLEWRFDSRLAFHHMARELHPEYYKNC", - "nucleotides": "ATGGGGGGCAAGTGGTCAAAAAGTAGTTTGGTTGGATGGCCTAATGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATGGGGTGGGAGCAGTATCTCGAGACTTGGAAAGACATGGAGCAATCACAAGTAGTAATACAGCAACTAACAATGCTGATTGTGCCTGGCTAGAAGCACAAAAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAAAAGACAAGATATTCTTGATCTATGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGAGCCCACTGACAGAGAGAATGACAGATTGCTGCACCCTGCAAGCCTGCAGGGGATGGAAGACCCGGAGGGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTGGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAGTACTACAAGAACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MN090335": [ - { - "region": "gag", - "start": 481, - "end": 1664, - "orientation": "forward", - "distance": 0.9171874999999998, - "indel_impact": 1230, - "protein": "MAYSPVAAPRLLPCALQQATPQDLNTMLNTVGGHQAAMQMLKEVINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQATGTANILMQRGNFRNQRKPVKCFNCGKEGHIAKNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSHKGRPGNFPQSRPEIPQSRPEPTAPPAPPEESFRFGEATTPSQKQETMDKELYPPLTSLRSLFGNDPSSQ", - "nucleotides": "ATATGTTTTAATCTATATTTTTTCTTTCCCCCTGGCCTTAACCGAATTTTCTCCCATCTATCTAATTCTCCCCCGCTCAATACTGACGCTCTCGCACCCATCTCTCTCCTTCTAGCCTCCGCTAGTCAAAAATGGCGTACTCACCAGTCGCCGCCCCTCGCCTCTTGCCGTGCGCGCTTCAGCAAGCCACCCCACAAGATTTGAACACTATGCTAAACACAGTGGGTGGACACCAAGCAGCTATGCAAATGTTAAAAGAGGTCATCAATGAGGAAGCTGCAGAATGGGATAGATTACATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACACATAATCCACCTATCCCAGTAGGAGAGATTTATAAAAGATGGATAATTCTGGGACTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTTTGGACATAAAGCAAGGGCCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTTTAAGAGCCGAGCAAGCTACACAGGAAGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCAAATCCAGATTGTAAAACTATTTTAAAAGCATTAGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGGGGACCCGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGCAACAGGTACAGCCAACATACTGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGCCTGTTAAATGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACACCAAATGAAGGATTGCACTGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCTCACAAGGGAAGGCCAGGGAACTTTCCTCAGAGCCGACCAGAAATTCCTCAGAGCAGACCAGAGCCAACAGCCCCACCAGCCCCTCCAGAAGAGAGCTTCAGGTTTGGGGAAGCAACAACTCCCTCTCAGAAGCAGGAGACAATGGACAAGGAGCTGTATCCTCCTCTAACTTCCCTCAGATCACTCTTTGGCAACGACCCATCGTCACAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1427, - "end": 4468, - "orientation": "forward", - "distance": 0.23952802359881997, - "indel_impact": 0, - "protein": "FFRENLAFSQGKARELSSEPTRNSSEQTRANSPTSPSRRELQVWGSNNSLSEAGDNGQGAVSSSNFPQITLWQRPIVTIKIGGQLKEALLDTGADDTVLEDIDLPGKWKPKMIGGIGGFIKVRQYEQVPIEISGHKAIGTVLVGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDGTKWRKLVDFRELNKRTQEFWEIQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKTNPDIVIYQYMDDLYVGSDLEIGQHRTKVEELRQHLMRWGFTTPDKKHQKEPPFLWMGYELHPNKWTVQPIVLPEKESWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKDPVHGAYYDPAKDLIAELQKQGEGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKISTESIVIWGRTPKFRLPIQKETWETWWTDYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRDNKSGKAGYVTDRGRQKVVTINDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIEKAQEDHEKYHSNWRTMASDFNLPPIVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEEKIILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKTIHTDNGRNFTSNTVKAACWWAGIKQEFGIPYNPQSQGVVESMNNELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATEIQTKELQKQITKIPNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCTCACAAGGGAAGGCCAGGGAACTTTCCTCAGAGCCGACCAGAAATTCCTCAGAGCAGACCAGAGCCAACAGCCCCACCAGCCCCTCCAGAAGAGAGCTTCAGGTTTGGGGAAGCAACAACTCCCTCTCAGAAGCAGGAGACAATGGACAAGGAGCTGTATCCTCCTCTAACTTCCCTCAGATCACTCTTTGGCAACGACCCATCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAGGACATAGATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAATATGAACAGGTACCCATAGAAATTAGCGGACACAAAGCTATAGGTACAGTGTTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGCACTCTAAATTTTCCCATCAGCCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGACGGCCCAAAGGTTAAACAATGGCCATTAACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTGTTTGCCATAAAGAAAAAAGATGGCACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGAATTCTGGGAAATTCAATTAGGTATACCACATCCTGCAGGGCTAAAAAAGAAAAAATCAGTCACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATAAAGACTTCAGGAAATATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTATAATGTGCTTCCACAAGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAGATCCTGGAGCCTTTTAGAAAGACAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAACATAGAACAAAAGTAGAGGAACTGAGGCAACATCTGATGAGGTGGGGATTTACCACACCAGACAAAAAGCACCAGAAAGAACCCCCATTCCTTTGGATGGGGTATGAACTCCATCCTAATAAATGGACAGTACAGCCTATAGTACTGCCAGAAAAGGAAAGTTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTATGCAGGGATTAAAGTGAAGCAATTATGTAAGCTCCTTAGGGGAACCAAAGCATTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGATCCGGTACATGGAGCATATTATGACCCAGCAAAGGACTTAATAGCAGAACTACAGAAGCAGGGAGAAGGTCAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGGAAATATGCAAGAACGAGGGGTGCCCATACTAATGATGTAAAGCAATTAACAGAGGCAGTGCAAAAAATATCCACAGAAAGTATAGTAATATGGGGAAGGACTCCTAAATTTAGATTACCCATACAAAAAGAAACATGGGAAACATGGTGGACAGATTATTGGCAAGCCACCTGGATCCCTGAGTGGGAGTTTGTCAACACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAAAAAGAACCCATAATAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAATAGGGATAATAAATCAGGAAAAGCAGGATATGTTACTGATAGAGGAAGACAAAAGGTTGTCACCATAAATGACACAACAAATCAGAAAACTGAATTACAAGCCATTCATCTAGCCTTGCAGGATTCGGGGCTAGAAGTAAACATAGTAACAGACTCACAGTATGCATTAGGGATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTCAGTCAAATAATAGAACAGTTAATAAAAAAGGAAAAGGTCTACCTGGCATGGGTGCCAGCCCACAAAGGAATTGGAGGAAATGAACAGGTAGATAAACTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGAAAAAGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAACAATGGCTAGTGATTTTAACCTACCACCTATAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCTATGCATGGGCAAGTAGACTGTAGTCCAGGTATATGGCAGCTAGATTGTACACATTTAGAAGAAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTGATTCCAGCAGAGACAGGGCAAGAAACAGCATACTTTATTTTAAAACTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCCGCAATTTCACCAGTAATACAGTGAAGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGTATTCCCTACAATCCCCAGAGTCAAGGAGTAGTAGAATCTATGAATAATGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGATACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGAAATACAGACTAAAGAATTACAAAAACAAATTACAAAAATTCCAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAACAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAAATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4413, - "end": 4991, - "orientation": "forward", - "distance": 0.34375, - "indel_impact": 0, - "protein": "MENRWQVMIVWQVDRMRIRTWKSLVKYHMYISKKAKKWAYRHHYETTHPRISSEVHIPLGDDRLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPDLADHLIHVHYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALTAVITPKKIKPPLPSVKKLTEDRWNKPQKTKGHRESHTMNGH", - "nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAATACCATATGTATATTTCAAAGAAAGCCAAAAAATGGGCTTATAGACACCATTACGAAACCACTCATCCAAGAATAAGTTCAGAAGTACATATCCCACTAGGGGATGATAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAGAGAGACTGGCATTTGGGCCAGGGAGTCTCCATAGAATGGAGAAAAAGGAGATATAGCACACAAGTAGACCCTGACTTAGCAGACCATCTAATTCATGTGCATTATTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATAGAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAACAGCAGTAATAACACCAAAGAAGATAAAGCCACCTTTGCCTAGTGTAAAGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 4931, - "end": 5221, - "orientation": "forward", - "distance": 0.38144329896907214, - "indel_impact": 0, - "protein": "MEQAPEDQGPQREPYNEWTLELLEELKREAVRHFPRPWLQDLGQYIYETYGDTWTGVEAIIRILQQMLFIHFRIGCHHSRIGIVLQRRARNGASRS", - "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAGAGCTTTTAGAAGAGCTTAAGAGAGAAGCTGTTAGACATTTCCCTAGACCATGGCTACAAGACCTAGGACAATATATCTATGAAACTTATGGGGATACTTGGACAGGAGTGGAAGCCATAATAAGAATTCTGCAACAAATGCTGTTTATTCATTTCAGAATTGGGTGTCACCATAGCAGAATAGGCATTGTTCTGCAAAGAAGAGCAAGAAATGGAGCCAGTAGATCATAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5202, - "end": 5416, - "orientation": "forward", - "distance": 0.41666666666666674, - "indel_impact": 0, - "protein": "MEPVDHRLEPWKHPGSQPRTPCTNCYCKKCCFHCQVCFIQKALGISYGRKKRRQRRRSPQDSQTHQVPLPKX", - "nucleotides": "ATGGAGCCAGTAGATCATAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTCCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTGTGTTTTATACAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5341, - "end": 5416, - "orientation": "forward", - "distance": 0.5769230769230769, - "indel_impact": 0, - "protein": "MAGRSGDRDEDLLKTVRLIKFLYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5433, - "end": 5681, - "orientation": "forward", - "distance": 0.6837209302325582, - "indel_impact": 0, - "protein": "MQSLEILAIVALVVAAIIAIVVWTIVGIEYRKILRQRKIDKIINRIRERAEDSGNESEGDQEELSALVVEMGHDAPWDVDDL", - "nucleotides": "ATGCAATCTTTAGAGATATTAGCAATAGTAGCATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTAGGCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGATAAAATAATTAATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGCGAAGGGGATCAGGAAGAATTGTCAGCACTTGTGGTGGAAATGGGGCACGATGCTCCTTGGGATGTTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 5596, - "end": 8157, - "orientation": "forward", - "distance": 0.6589887640449441, - "indel_impact": 31, - "protein": "MRAKGIRKNCQHLWWKWGTMLLGMLMICSAAENLWVTVYYGVPVWRDANTTLFCASDAKAYDTEVHNVWATHACVPTDPNPHEVELKNVTENFNMWKNNMVDQMHEDIINLWDQSLKPCAKLTPLCVTLNCTDLRNDTVGNQTNLTETNTIQGREMTNCSFNITTEIRDKVRKEHALFYKLDVMPIDRDNTSYTLINCNTSVITQACPKVTFEPIPIHYCTPAGFAILKCKDEMFNGTGPCKNVSTVQCTHGIRPVVSTQLLLNGSLAEKEIVLRSENFTDNGKNIIVQLNRSIVINCTRPNNNTRKSISVAGRAIYATGQIIGDIRQAHCNISETDWNDTLSKIVEKLREKFGKDKTIIFNQSSGGDMEIETHSFNCGGEFFYCNTTRLFNSTWSVNGTSINGTKNITLPCRIKQIINRWQEVGKAMYAPPISGIIRCSSNITGLILTRDGGTNNSTEETETFRPGGGNMKDNWRSELYKYKVVKIEPLGVAPTKAQRRVVQREKRAIGALGAMFLGFLGAAGSTMGAASLTLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLSVERYLQDQQLLGIWGCSGKLICTTTVPWNTSWSNKSYDTIWDNMTWMQWDREIQNYTGKIYNLLEESQIQQEKNEKELLELDQWANLWNWFSITKWLWYIKIFIMIVGGLVGLKIIFAVFSIVNRVRQGYSPLSLQTLLPTPRGPDRPEGIEEEGGERDRGRSGRLVTGFLPLIWDDLRSLCLFSYHHLRDLLLIVLRTVQVLGHRGWEILKYWWSLLQYWIQELKNSAVSLLNTIAIAVAEGTDRVIEVGQRIGRAFLHIPRRIRQGLERALQ", - "nucleotides": "ATGAGAGCGAAGGGGATCAGGAAGAATTGTCAGCACTTGTGGTGGAAATGGGGCACGATGCTCCTTGGGATGTTGATGATCTGTAGTGCTGCGGAAAATTTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAGAGATGCAAATACCACTCTATTCTGTGCATCAGATGCTAAAGCATATGATACAGAAGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCTAACCCACATGAAGTAGAATTGAAAAATGTGACAGAAAATTTTAATATGTGGAAAAATAACATGGTGGACCAGATGCATGAGGATATAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGCAAAATTGACCCCACTCTGTGTTACTTTAAATTGCACTGATTTAAGAAATGATACTGTAGGAAATCAAACAAATCTCACTGAAACTAATACAATACAGGGAAGAGAAATGACAAACTGCTCTTTCAATATCACCACAGAGATAAGAGATAAAGTGCGAAAAGAACATGCACTTTTTTATAAACTTGATGTCATGCCAATAGATAGGGATAATACAAGTTATACATTGATAAATTGCAATACCTCAGTCATTACACAGGCCTGTCCAAAGGTAACCTTTGAACCAATTCCCATACATTATTGTACTCCAGCTGGTTTTGCAATTCTAAAGTGTAAGGATGAGATGTTCAATGGAACAGGACCATGTAAGAATGTCAGCACAGTACAATGTACACATGGAATTAGACCAGTAGTGTCAACTCAACTACTGTTAAATGGTAGCCTAGCAGAAAAAGAGATAGTACTTAGATCTGAAAATTTCACAGACAATGGTAAAAATATAATAGTACAGCTAAATAGATCTATAGTAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGTATAAGTGTAGCAGGGAGAGCAATTTATGCAACAGGGCAGATAATAGGAGATATAAGACAAGCACATTGTAACATCAGTGAGACAGATTGGAATGACACTTTAAGCAAAATAGTTGAAAAATTAAGGGAAAAATTTGGAAAAGATAAAACAATAATCTTTAATCAATCATCAGGAGGGGACATGGAAATTGAAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAACACAACACGACTGTTTAATAGTACTTGGAGTGTTAATGGAACTAGCATAAACGGAACTAAAAATATCACACTCCCGTGCAGAATAAAACAAATTATAAACAGGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCTATCAGTGGGATAATTAGGTGCTCATCAAATATTACAGGACTAATATTAACAAGAGATGGTGGTACAAATAATAGTACAGAGGAGACGGAGACCTTCAGACCTGGAGGGGGAAATATGAAGGACAATTGGAGAAGTGAATTATATAAATACAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAAGGCACAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAATAGGAGCGTTAGGAGCTATGTTCCTCGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCACTGACGCTGACGGTACAAGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATCTGCTGAGGGCTATTGAGGCGCAACAGCATATGTTGCAACTCACAGTCTGGGGCATCAAACAGCTCCAGGCAAGAGTCCTGTCTGTGGAAAGATACCTACAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTACTGTGCCTTGGAATACTAGTTGGAGCAATAAATCTTACGATACCATCTGGGATAACATGACCTGGATGCAGTGGGACAGAGAAATTCAAAATTACACAGGGAAAATATACAACTTACTTGAGGAATCGCAAATCCAACAGGAAAAGAATGAAAAGGAATTATTAGAACTAGATCAATGGGCAAATTTGTGGAATTGGTTTAGTATAACAAAATGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTAGTAGGTTTAAAAATAATTTTTGCTGTATTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCATTGCAGACCCTCCTCCCAACCCCGAGGGGACCAGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGGCAGATCCGGAAGATTAGTGACTGGATTCTTACCACTCATCTGGGACGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTACTCTTGATTGTGCTGAGGACTGTGCAAGTTCTGGGACACAGGGGGTGGGAGATCCTCAAGTACTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATACCATAGCTATCGCAGTAGCTGAGGGAACAGATAGGGTTATAGAGGTAGGACAAAGAATTGGCAGAGCTTTTCTCCACATACCTAGAAGGATAAGACAGGGATTAGAAAGGGCTTTGCAATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 7739, - "end": 7831, - "orientation": "forward", - "distance": 0.4838709677419355, - "indel_impact": 32, - "protein": "RPSSQPRGDQTGPKE", - "nucleotides": "AGACCCTCCTCCCAACCCCGAGGGGACCAGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGGCAGATCCGGAAGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 7740, - "end": 8015, - "orientation": "forward", - "distance": 0.4565217391304348, - "indel_impact": 0, - "protein": "DPPPNPEGTRQARRNRRRRWRERQRQIRKISDWILTTHLGRPAEPVPLQLPPLERLTLDCAEDCASSGTQGVGDPQVLVESPAVLDSGTKE", - "nucleotides": "GACCCTCCTCCCAACCCCGAGGGGACCAGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGGCAGATCCGGAAGATTAGTGACTGGATTCTTACCACTCATCTGGGACGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTACTCTTGATTGTGCTGAGGACTGTGCAAGTTCTGGGACACAGGGGGTGGGAGATCCTCAAGTACTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAACTAAAGAATAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8159, - "end": 8812, - "orientation": "forward", - "distance": 0.5935483870967742, - "indel_impact": 0, - "protein": "MGGKWSKGCMAGWPTVRERMERIDPRPAARREQAEPAAAGVGAASRDLEKYGAITSSNTSTTNAACAWLEAQEEEEVGFPVRPQVPLRPMTYKAALDLSHFLKEKGGLEGLIWSQRRQEILDLWVYHTQGYFPDWQNYTPGPGVRFPLTFGWCFKLVPVDPDQVEKANEGENNCLLHPMSLHGMEDSEGEVLMWKFDSSLAFHHRAREQHPEYYKDC", - "nucleotides": "ATGGGTGGCAAGTGGTCAAAAGGTTGTATGGCTGGATGGCCTACTGTAAGGGAAAGAATGGAAAGAATTGATCCAAGGCCTGCTGCAAGGAGGGAACAAGCTGAGCCAGCAGCAGCTGGGGTAGGAGCAGCATCTCGAGACTTGGAAAAATATGGAGCAATCACAAGTAGCAATACATCAACTACTAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAAGTAGGCTTTCCAGTCAGACCTCAGGTACCTTTAAGACCAATGACTTACAAAGCAGCTTTGGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTGGTCCCAGAGAAGACAAGAAATCCTTGATTTGTGGGTTTACCACACACAAGGATACTTCCCTGATTGGCAAAACTACACACCAGGACCAGGGGTCAGATTTCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGACCAAGTAGAGAAGGCCAATGAAGGAGAGAACAACTGCTTGCTACACCCTATGAGCCTGCATGGGATGGAAGACTCAGAGGGAGAAGTGTTAATGTGGAAGTTTGACAGCAGCCTGGCGTTTCATCATAGAGCCCGAGAACAACATCCGGAGTACTACAAAGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MN090376": [ - { - "region": "gag", - "start": 540, - "end": 1589, - "orientation": "forward", - "distance": 1.3350597609561752, - "indel_impact": 754, - "protein": "MYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSASVMMQKGNFRSQRKMVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFPQSRTEPTAPPEESFSFGEKTTTPSQKQEPIDQELYPLSSLKSLFGNDP", - "nucleotides": "TGCTACTGTATTAAATAATGATTTAAGTTCCTCTGATCCTGTCTGAAGTGCTGGTTGTAGCTGTCCCAGTATTTGTCTACAGCCTTCTGATGTTTCTAACAGGCCAGGATTAACTGCGAATCGTTCCAGTTCCCTGCTTGCCCATACTATATGTTTTAACTTATATCTTTTCTTTCCCCCTGGCCTTAACCGAATTTTCTCCCATTTGTCTAATTCTCCCCCGCTTAATACCGACGCTCTCGCACCCATCTCTCTCCTTCTAGCCTCCGCTAGTCAAAAAGGCGTACTCACCGTTCGCCGCCCCTCGCCTCTTGCTGTGCGCGCTTCAGCAAGCCGAGTCCGATAATTCTGGGATTAAATAAAATAGTAAGAATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGATCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCAAACCCAGATTGTAAGACCATTTTAAAAGCATTGGGTCCAGCAGCTACACTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCGGAAGCAATGAGCCAAGTAACAAACTCAGCTTCCGTGATGATGCAGAAAGGCAATTTTAGGAGCCAAAGAAAGATGGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCAAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGGAACTTTCCTCAAAGCAGAACAGAACCAACAGCCCCACCAGAGGAGAGCTTCAGTTTTGGGGAGAAGACAACAACTCCTTCTCAGAAGCAGGAGCCGATAGACCAGGAACTATACCCTCTGTCTTCCCTCAAATCACTCTTTGGCAACGACCCTTAGTCACAGTAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1382, - "end": 4393, - "orientation": "forward", - "distance": 0.1952380952380952, - "indel_impact": 0, - "protein": "FFRENLAFPQGEARELSSKQNRTNSPTRGELQFWGEDNNSFSEAGADRPGTIPSVFPQITLWQRPLVTVRIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVKQYDQISVEICGHKAIGTVLIGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALTEICKEMEKEGKISKIGPENPYNTPIFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKVEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGAKALTEVVPLTEEAELELAENREILKEPVHGVYYDPTKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWESWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRETKSGKAGYVTDRGRQKVISLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIQKEKVYLAWVPAHKGIGGNEQVDKLVSSGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNIPPVVAKEIVACCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVTTIHTDNGPNFTSNAVKAACWWAGVKQEFGIPYNPQSQGVVESMNKQLKQIIGQVREQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDGCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGGAACTTTCCTCAAAGCAGAACAGAACCAACAGCCCCACCAGAGGAGAGCTTCAGTTTTGGGGAGAAGACAACAACTCCTTCTCAGAAGCAGGAGCCGATAGACCAGGAACTATACCCTCTGTCTTCCCTCAAATCACTCTTTGGCAACGACCCTTAGTCACAGTAAGGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTGTTAGAAGAAATGAGTTTACCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGCTTTATCAAAGTAAAACAGTATGATCAGATATCAGTAGAAATTTGTGGACATAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAATATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGTACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAGAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAACAGAAATTTGTAAGGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAATATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAGTTAGTAGATTTTAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACACCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCCTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTACAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTACTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATATATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAGTAGAGGAACTGAGACAACATCTGTTGAAGTGGGGGTTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATACAACTGCCAGAGAAAGACAGCTGGACTGTCAATGATATACAGAAATTAGTGGGAAAGTTGAATTGGGCAAGTCAGATTTATCCAGGGATTAAAGTAAGGCAGTTATGTAAACTACTTAGAGGAGCCAAGGCGCTGACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATCCTAAAAGAACCAGTACATGGAGTGTATTATGACCCAACAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGTCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAACTTGAAAACAGGAAAATATGCAAGAACAAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAGACCCCTAAATTTAGACTACCCATTCAAAAAGAAACATGGGAATCATGGTGGACAGAATATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAAAAAGAACCCATAATAGGAGCAGAGACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACAAAATCAGGAAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAAGTTATCTCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAGGCACAACCAGATAAAAGCGAATCAGAGTTAGTCAATCAAATAATAGAGCAATTAATACAAAAGGAAAAGGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGGGGAAATGAACAAGTAGATAAATTAGTCAGTTCTGGAATCAGAAAGGTACTATTTTTGGATGGAATAGATAAAGCCCAAGATGAGCATGAAAAATATCACAGTAATTGGAGAGCAATGGCTAGTGACTTTAACATACCACCTGTAGTAGCAAAAGAAATAGTAGCCTGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAATTAGATTGTACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGACAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAACAACAATACATACAGACAATGGCCCCAACTTCACCAGTAATGCAGTTAAGGCCGCCTGTTGGTGGGCAGGGGTCAAACAAGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTCGTAGAATCTATGAATAAACAATTAAAACAAATTATAGGACAGGTAAGAGAGCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAGGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTGGTAATACAAGATAATAGTGACATAAAAGTAGTACCAAGAAGAAAAGCAAAAATTATTAGGGATTATGGAAAACAGATGGCAGGTGATGGTTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4338, - "end": 4919, - "orientation": "forward", - "distance": 0.4578680203045684, - "indel_impact": 0, - "protein": "MENRWQVMVVWQVDRMRISMWKSLVKYHIHNSGKARGWCYRHHYESRHPRISSEVHIPLGGEARLVVTTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPDLADQLIHLYYFNCFSESAIRQAILGHRVSPRCEYQAGHNKVGSLQYLALIALIPPKRKKPPLPSVRKLTEDRWKEPQRTKGHRESHTMNGH", - "nucleotides": "ATGGAAAACAGATGGCAGGTGATGGTTGTGTGGCAAGTAGACAGGATGAGGATTAGCATGTGGAAAAGCTTAGTAAAATACCATATACATAATTCAGGGAAGGCTAGGGGATGGTGTTATAGACATCACTATGAAAGCAGACATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGAGGAGAGGCTAGACTAGTAGTGACAACATATTGGGGACTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAAGGAGTCTCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTGACCTAGCAGACCAACTAATTCATCTGTATTATTTTAATTGTTTTTCAGAATCTGCTATAAGACAAGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAATAAGGTAGGATCTCTGCAGTACTTGGCACTAATAGCATTAATACCACCAAAAAGGAAAAAGCCACCTTTGCCTAGTGTGAGGAAACTGACAGAGGATAGATGGAAAGAGCCCCAGAGGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 4859, - "end": 5149, - "orientation": "forward", - "distance": 0.35051546391752586, - "indel_impact": 0, - "protein": "MERAPEDQGPQREPYNEWTLELLEELKREAVRHFPRAWLHGLGQHIYETYGDTWAGVEALIRILQQLVFIHFRIGCQHSRIGILTHRRARNGASRS", - "nucleotides": "ATGGAAAGAGCCCCAGAGGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAGAACTTTTAGAAGAGCTTAAAAGAGAAGCTGTTAGGCACTTTCCTAGGGCTTGGCTTCATGGCCTAGGACAACATATCTATGAAACTTATGGAGATACTTGGGCAGGAGTGGAAGCTCTAATAAGAATTCTGCAACAACTGGTGTTTATTCATTTCAGAATTGGATGTCAGCACAGCAGGATAGGCATTCTTACCCATAGAAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5130, - "end": 5344, - "orientation": "forward", - "distance": 0.5416666666666667, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPRTACTSCYCKKCCYHCPRCFLTKGLGISYGRKKRRQRRRTPQDSRNHQVSLSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTGGAGCCCTGGAAGCATCCAGGGAGTCAGCCTAGGACTGCGTGCACCAGTTGCTATTGTAAAAAGTGTTGCTATCATTGCCCGCGATGCTTCTTAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAGGATAGCCGGAACCATCAAGTTTCTTTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5269, - "end": 5344, - "orientation": "forward", - "distance": 0.7517241379310344, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLRIAGTIKFLYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAGGATAGCCGGAACCATCAAGTTTCTTTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5361, - "end": 5630, - "orientation": "forward", - "distance": 0.8152173913043479, - "indel_impact": 0, - "protein": "MQSLEILAIVALVVAFIIAIVVWSIVLIEYRKIRRQKRIDRILDRIRERAEDSGNESEGDQEELAALVEMGNHAHLDMGHHAPWDVNDL", - "nucleotides": "ATGCAATCTTTAGAAATATTAGCAATAGTAGCATTAGTAGTAGCATTTATAATAGCAATAGTCGTTTGGTCTATAGTACTCATAGAATATAGGAAAATAAGAAGACAAAAGAGAATAGACAGGATACTTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGACCAAGAAGAATTGGCAGCCCTCGTGGAGATGGGGAACCATGCTCATTTGGATATGGGCCACCATGCTCCTTGGGATGTCAATGACCTGTAA", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 5524, - "end": 8109, - "orientation": "forward", - "distance": 0.6166294642857151, - "indel_impact": 0, - "protein": "MRVKGTKKNWQPSWRWGTMLIWIWATMLLGMSMTCNAEDSWVTVYYGVPVWKEAATTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVELNVTENFNAWKNNMVDQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNINSTNINNTNSIEREMTNCSFNVTTVIRDKVQKQYALFYKLDVVQIKDDNTSYNTSYRLINCNSSTITQACPKVSFEPIPIHYCAPAGFAILKCKDKNFNGTGLCKNVSTVQCTHGIRPVVSTQLLLNGSLAEGDVVIRSKNFSNSAQTIIVQLNETVEINCTRPNNNTRRSIHIGPGRAFYATGDIIGDIRQAHCNISRAKWNETLKKIADKLREQFKRETIVFNQSSGGDPEIVMHSFNCRGEFFYCNTTQLFNSTWNSTTGVNGTGANNTEVNIITLPCKIRQIVNMWQEVGKAMYAPPISGQISCSSNITGLLLLRDGGNSNDTDNTTEIFRPGGGDMRDNWRSELYKYKVVQIEPLGVAPTKARRRVVQREKRAVGIGALFLGFLGAAGSTMGAASITLTVQARHLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTNVPWNSSWSNKNLSYIWNNMTWMQWEKEIDNYTEEIYNLIEQSQNQQEKNEQELLELDKWASLWNWFDITKWLWYIKIFIMIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTLLPAARGPDRPEGIGEEGGERDRDRSSHLAHGFLTIIWVDLRSLFLFSYHHLRDLLLIVARGVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRFIEVLQRIFRAIIHIPTRIRQGLERALL", - "nucleotides": "ATGAGAGTGAAGGGGACCAAGAAGAATTGGCAGCCCTCGTGGAGATGGGGAACCATGCTCATTTGGATATGGGCCACCATGCTCCTTGGGATGTCAATGACCTGTAATGCAGAGGACTCGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAGCCACCACTCTATTTTGTGCATCAGATGCCAAAGCATATGACACAGAGGTACACAATGTCTGGGCCACACATGCTTGTGTACCCACAGACCCCAACCCACAAGAAGTAGAATTAAATGTAACAGAAAATTTTAATGCATGGAAAAATAACATGGTAGATCAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAACTGCACTAACATCAATAGCACTAATATAAACAATACCAATAGTATAGAAAGAGAAATGACAAACTGCTCTTTTAATGTCACCACAGTCATAAGAGATAAGGTGCAAAAACAATATGCACTTTTTTATAAACTTGATGTAGTACAAATTAAGGATGATAATACCAGCTATAATACCAGCTATAGGTTGATAAATTGTAACTCCTCAACCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAATTTCAATGGAACAGGACTATGTAAAAATGTTAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGTTGTTAAATGGCAGTCTAGCAGAAGGAGATGTAGTAATTAGATCTAAAAATTTCTCTAACAGTGCTCAAACCATAATAGTACAATTAAACGAAACTGTAGAAATTAATTGTACAAGACCCAACAATAATACAAGAAGAAGTATACATATAGGACCAGGGAGAGCATTTTATGCAACAGGAGACATAATAGGAGATATAAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATGAGACTTTAAAAAAGATAGCTGATAAATTAAGGGAACAATTTAAAAGAGAGACAATAGTCTTTAACCAATCCTCAGGAGGGGACCCAGAGATTGTAATGCACAGTTTTAATTGTAGAGGGGAATTTTTCTACTGTAATACAACACAACTGTTTAATAGTACTTGGAATAGTACTACAGGAGTAAATGGTACAGGAGCAAATAACACAGAAGTAAATATTATCACACTCCCATGCAAGATAAGGCAAATTGTAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGTTGTTCATCAAATATTACAGGGCTACTACTACTAAGAGATGGTGGTAATAGCAATGACACGGATAACACGACTGAAATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTACAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAGGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTCTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCATCAATAACGCTGACGGTACAAGCCAGACATTTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATATGTTGCAGCTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTAGAAAGATACCTTAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATCTGCACCACTAATGTGCCTTGGAATAGTAGTTGGAGTAATAAAAATCTGAGTTACATTTGGAATAACATGACCTGGATGCAGTGGGAAAAAGAAATTGACAATTACACAGAGGAAATATACAACTTAATTGAACAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTGACATAACAAAGTGGCTGTGGTATATAAAAATATTTATAATGATAGTAGGAGGCTTAATAGGTTTAAGAATAGTTTTTACTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTTCAGACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAGCGCATGGATTCTTAACAATTATCTGGGTCGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCACTTGAGAGACTTACTCTTGATTGTAGCGAGGGGTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGCCAGGAACTAAAGAATAGTGCTGTCAGCTTGCTCAACGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGTTTATAGAAGTATTGCAAAGAATTTTTAGAGCTATTATCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCTTTGCTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 7691, - "end": 7783, - "orientation": "forward", - "distance": 0.4838709677419355, - "indel_impact": 0, - "protein": "RPSSQPRGDPTGPKESEKKVERETETDPVT", - "nucleotides": "AGACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 7692, - "end": 7967, - "orientation": "forward", - "distance": 0.4565217391304348, - "indel_impact": 0, - "protein": "DPPPSREGTRQARRNRRRRWRERQRQIQSLSAWILNNYLGRPAEPVPLQLPPLERLTLDCSEGCGTSGTQGVGSPQVLVESPAVLEPGTKE", - "nucleotides": "GACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAGCGCATGGATTCTTAACAATTATCTGGGTCGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCACTTGAGAGACTTACTCTTGATTGTAGCGAGGGGTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGCCAGGAACTAAAGAATAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8111, - "end": 8734, - "orientation": "forward", - "distance": 0.6291866028708133, - "indel_impact": 0, - "protein": "MGGIFSKRNGGGWPAIRERMRRAQPAADGVGAASRDLERHGALTSNNTSANNADCAWLEAQEEGEEVGFPVRPQVPLRPMTYKGAVDLSHFLKEKGGLEGLIYSQQRQDILDLWVYNTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPMEPEDVEKATEGEDNILLHPVNQHGMDDSEREVLVWKFDSRLAFRHVAREIHPEYYKNC", - "nucleotides": "ATGGGAGGCATATTCTCAAAACGTAATGGGGGTGGGTGGCCTGCAATAAGGGAAAGAATGAGACGAGCTCAGCCAGCAGCAGATGGGGTAGGAGCAGCATCTCGAGACCTGGAAAGACATGGAGCTCTCACAAGTAACAATACATCAGCTAACAATGCTGATTGTGCCTGGCTGGAAGCACAGGAAGAGGGGGAAGAAGTGGGTTTCCCAGTCAGACCTCAGGTACCTTTAAGACCAATGACATACAAGGGAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGTTAATTTACTCCCAACAAAGACAAGATATCCTTGATCTGTGGGTCTACAACACACAGGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATAAGATATCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAATGGAACCAGAGGATGTAGAAAAGGCCACTGAAGGAGAGGACAACATCTTGTTACACCCTGTGAACCAGCATGGGATGGACGACTCAGAGAGAGAAGTGTTAGTGTGGAAGTTTGACAGCCGCCTAGCATTCCGTCACGTGGCCCGAGAGATACATCCGGAGTACTACAAGAACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115581.1": [ - { - "region": "gag", - "start": 680, - "end": 2179, - "orientation": "forward", - "distance": 0.2675944333996021, - "indel_impact": 0, - "protein": "MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", - "nucleotides": "ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGACGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGCTAATGTAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1972, - "end": 4983, - "orientation": "forward", - "distance": 0.15109343936381703, - "indel_impact": 0, - "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTACTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4928, - "end": 5506, - "orientation": "forward", - "distance": 0.36269430051813467, - "indel_impact": 0, - "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", - "nucleotides": "ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5446, - "end": 5736, - "orientation": "forward", - "distance": 0.2886597938144331, - "indel_impact": 0, - "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", - "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5717, - "end": 5931, - "orientation": "forward", - "distance": 0.25, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5856, - "end": 5931, - "orientation": "forward", - "distance": 0.46153846153846145, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5948, - "end": 6193, - "orientation": "forward", - "distance": 0.727586206896552, - "indel_impact": 0, - "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL", - "nucleotides": "ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAAAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 6111, - "end": 8651, - "orientation": "forward", - "distance": 0.5241695303550973, - "indel_impact": 0, - "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", - "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGAAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAAAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8233, - "end": 8325, - "orientation": "forward", - "distance": 0.29032258064516125, - "indel_impact": 0, - "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", - "nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8234, - "end": 8509, - "orientation": "forward", - "distance": 0.3913043478260869, - "indel_impact": 0, - "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", - "nucleotides": "GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8653, - "end": 9267, - "orientation": "forward", - "distance": 0.5333333333333332, - "indel_impact": 0, - "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", - "nucleotides": "ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTATGAGCCTGCATGGGATGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115690.1": [ - { - "region": "gag", - "start": 777, - "end": 2285, - "orientation": "forward", - "distance": 0.19661354581673307, - "indel_impact": 0, - "protein": "MGARASVLSGGELDKWEKIRLRPGGKKKYRLKHIVWASRELERFAVNPGLLETSEGCRQIMGQLQPALQTGSEELRSLYNTVATLYCVHQRIEVKDTKEALEKVEEEQNKSKKKAQQAAAGAGNSSQTSTSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQATNSAAIMMQRGNFRNQRKSVKCFNCGKDGHIAKNCRAPRRKGCWKCGKEGHQMKDCPERQANFLGKIWPSHKGRPGNFLQNRPEPTAPPEESFRFGEETTTPSQKQEKIDQDLYPLASLKSLFGNDPSSQ", - "nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGACAAGTGGGAAAAAATTCGGTTGAGGCCAGGGGGAAAGAAAAAATATAGATTAAAGCATATCGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAACCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATAATGGGACAGCTACAACCAGCCCTTCAGACAGGATCAGAAGAACTTAGATCATTGTATAATACAGTAGCAACCCTCTATTGTGTACATCAAAGGATAGAGGTAAAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGGCGCAGGAAACAGCAGTCAGACCAGCACCAGCCAAAATTACCCTATAGTACAGAACATTCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTCTCAGCACTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTACATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAGCCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACTAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCCACCAGCATTCTAGACATAAGACAAGGACCAAAGGAGCCCTTTAGAGACTATGTAGATCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTAGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGTCCAGCAGCTACACTAGAGGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGTCATAAAGCAAGAGTTTTAGCGGAAGCAATGAGCCAAGCAACAAATTCAGCTGCCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAAAGTGTTAAGTGTTTTAATTGCGGCAAAGATGGGCACATAGCAAAAAATTGCAGGGCCCCTAGAAGAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTCCAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGAGGAGAGCTTCAGGTTTGGGGAGGAGACAACAACTCCCTCTCAGAAGCAGGAGAAGATAGACCAGGACCTGTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 2078, - "end": 5089, - "orientation": "forward", - "distance": 0.14129353233830844, - "indel_impact": 0, - "protein": "FFRENLAFPQGEAREFSSEQTRANSPTRGELQVWGGDNNSLSEAGEDRPGPVSFSFPQITLWQRPLVTIKVGGQLKEALLDTGADDTVLEDMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGVRYQYNVLPQGWKGSPAIFQSSMTKILEPYRKQNPNIVIYQYMDDLYVGSDLEIGQHRIKIEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIALPEKESWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGAKALTEVIPLTKEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQEQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKSGKAGYVTNRGRQKVIPLTDTTNQKTELHAIYLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQEDHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQEAAYFILKLAGRWPVATIHTDNGSNFTSATVKAACWWAGIKQEFGIPYNPQSQGVVESMNNELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDVIATDLQTKELQKQITKIQHFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSEIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGAGGAGAGCTTCAGGTTTGGGGAGGAGACAACAACTCCCTCTCAGAAGCAGGAGAAGATAGACCAGGACCTGTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGGTAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGACATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAAATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGCACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATAGGGCCTGAGAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGACTTCAGAGAACTAAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCCTATTTTTCAGTTCCCTTAGACAAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGGTTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAACCTTATAGAAAACAAAATCCAAACATAGTTATATATCAATACATGGATGATTTGTATGTAGGATCTGACCTAGAAATAGGGCAGCATAGAATAAAAATAGAAGAACTGAGACAACATCTGTTGAAGTGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGGTATGAACTCCATCCTGACAAGTGGACAGTACAGCCTATAGCGCTGCCAGAAAAAGAAAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAACTGGGCAAGTCAGATCTACCCAGGAATTAAAGTAAGGCAATTATGTAAACTACTTAGGGGAGCCAAAGCACTAACAGAAGTAATACCACTAACAAAAGAAGCAGAGCTAGAATTAGCAGAAAACAGGGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGAGCAAGGCCAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTGACAGAGGCAGTGCAAAAAATAGCCACAGAAAGCATAGTGATATGGGGAAAGACTCCTAAATTTAGACTACCCATACAAAAAGAAACATGGGAAGCATGGTGGACAGAATATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAACAGGGAAACTAAATCAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTATCCCCCTAACTGACACAACAAATCAGAAGACTGAGTTACATGCAATTTATCTAGCTTTGCAAGATTCGGGATTAGAAGTAAATATAGTAACAGACTCACAGTATGCATTAGGAATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTGTTTTTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTGGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTCATTCCAGCAGAGACAGGGCAGGAAGCAGCATACTTTATTTTAAAATTAGCAGGACGATGGCCAGTAGCAACAATACATACAGACAATGGCAGCAATTTCACCAGTGCTACAGTTAAGGCCGCCTGCTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAACAATGAATTGAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATTCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACGTAATAGCAACAGACTTACAAACTAAAGAACTACAAAAACAAATTACAAAAATTCAACATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGAAATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATTATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 5034, - "end": 5612, - "orientation": "forward", - "distance": 0.4093264248704662, - "indel_impact": 0, - "protein": "MENRWQVMIVWQVDRMRIRRWHSLVKHHIYISGKARGWVYKHHYENTHPRISSKVYIPLGEARLAVTTYWGLHTGERDWHLGQGVSIEWRKKEYSTQVDPNLADQLIHLYYFDCFSESAIRNVILGHRVSPRCEYQAGHNKVGSLQYLALTALVTPKKTKPPLPSITKLTEDRWNKPQRTKGHRGNHTMNGH", - "nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAAGATGGCACAGTTTAGTAAAACACCATATATATATTTCAGGGAAAGCTAGAGGATGGGTTTATAAACATCACTATGAAAACACTCATCCAAGAATAAGTTCAAAAGTATACATCCCACTAGGGGAAGCTAGACTGGCAGTAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCACTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGGAATATAGCACACAAGTAGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGTCATATTAGGACATAGAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTTTACAGTACTTGGCACTAACAGCATTAGTAACACCAAAAAAGACAAAGCCACCTTTGCCTAGTATTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAGAACCAAGGGCCACCGAGGGAACCATACAATGAATGGGCACTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5552, - "end": 5842, - "orientation": "forward", - "distance": 0.375, - "indel_impact": 0, - "protein": "MEQAPENQGPPREPYNEWALELLEELKSEAVRHFPRMWLHGLGQHIYETYGDTWTGVEALIRSLQQLLFIHFRIGCRHSRIGITPQRRTRNGASRS", - "nucleotides": "ATGGAACAAGCCCCAGAGAACCAAGGGCCACCGAGGGAACCATACAATGAATGGGCACTAGAGCTTTTAGAGGAGCTTAAGAGTGAAGCTGTTAGACATTTTCCTAGGATGTGGCTCCATGGCTTAGGGCAACACATCTATGAGACTTATGGGGATACTTGGACAGGAGTGGAAGCCTTAATAAGAAGTCTGCAACAACTGCTGTTCATTCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGAATTACTCCACAGAGGAGAACAAGAAATGGAGCCAGTAGATCCTAA", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5823, - "end": 6037, - "orientation": "forward", - "distance": 0.41666666666666674, - "indel_impact": 0, - "protein": "MEPVDPNLEPWKHPGSQPKTACTTCYCKQCCYHCQVCFITKGLGISYGRKKRRQRRGSPQDSQIHQASLSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAATCTAGAGCCCTGGAAGCATCCAGGAAGCCAGCCTAAGACTGCTTGTACCACTTGCTATTGTAAACAGTGTTGCTATCATTGCCAAGTTTGTTTCATAACCAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAACGACGAGGATCTCCTCAAGACAGTCAGATTCATCAAGCTTCTCTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5962, - "end": 6037, - "orientation": "forward", - "distance": 0.5769230769230769, - "indel_impact": 0, - "protein": "MAGRSGDNDEDLLKTVRFIKLLYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAACGACGAGGATCTCCTCAAGACAGTCAGATTCATCAAGCTTCTCTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 6054, - "end": 6299, - "orientation": "forward", - "distance": 0.5848837209302327, - "indel_impact": 0, - "protein": "MQSLAILAIVALVVAAIIAIVVWTIVFIEYRKILRQRKIDRLIDRIRERAEDSGNESEGDHEELSALMEMGHHAPWDVDDL", - "nucleotides": "ATGCAATCTTTAGCAATATTAGCAATAGTAGCATTAGTAGTAGCAGCCATAATAGCAATAGTTGTGTGGACCATAGTATTCATAGAATATAGGAAAATATTAAGACAAAGGAAAATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGATAGTGGCAATGAGAGTGAAGGGGATCATGAAGAATTATCAGCACTTATGGAGATGGGGCATCATGCTCCTTGGGATGTTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 6217, - "end": 8799, - "orientation": "forward", - "distance": 0.6138702460850114, - "indel_impact": 31, - "protein": "MRVKGIMKNYQHLWRWGIMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYDTEAHNVWATHACVPTDPNPQEIGLENVTENFNMWKNNMVEQMHEDIISLWDQSLKPCVKLTPLCVTLHCTKLEINSTKKTNSTNNGTNINATDDSWGEMKNCSFNTTASIRDKVQREFALFYKLDIVPIDNDDINYRLISCNTSVLTQACPKVSFEPIPIHYCAPAGFAILKCRDKNFNGTGQCKNVSTVQCTHGIRPVVSTQLLFNGSLAEEEVVIKSENITDNTKTIIVQLNASVAIVCTRPNNNTRKSIPIGPGRAFYAAGDIIGDIRRAHCILNKTTWDNTIEQVAKKLREQFENKTIVFSESSGGDPEITMISFNCGGEFFYCNSVQLFNSTWHNNGSSTTGSSSSEGNITLPCKIKQIINMWQEVGKAMYAPPIRGPISCESNITGLLLTRDGGNDANGNNTETFRPGGGNMRDNWRSELYRYKVVKIEPLGVAPTRAQRRVVQREKRAVGLGALFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLKDQQLLGLWGCSGKLICNTAVPWNTSWSNKSLDDIWHNMTWMEWEREIDNYTNIIYSLIEASQTQQEKNEQELLELDKWASLWNWFSISNWLWYIKIFIMIVGGLVGLRIVFTILSIVNRVRKGYSPLSFQTHLPAQRGPDRPEGTGDEGGEQDRDRSTHLVDGFLAIIWVDLRNLCLFLYHRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWRRELKNSAVSLLNATAIAVAEGTDRVIEGLRRAFRAIIHIPRRIRQGLERALQ", - "nucleotides": "ATGAGAGTGAAGGGGATCATGAAGAATTATCAGCACTTATGGAGATGGGGCATCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTTACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAAACACCACTCTATTCTGTGCATCAGATGCTAAAGCATATGATACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCTACAGACCCCAACCCACAAGAAATAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTACATTGCACTAAGTTGGAGATTAATAGCACTAAGAAGACTAATAGCACTAATAATGGTACTAACATCAATGCCACTGATGATAGTTGGGGGGAAATGAAAAACTGCTCTTTCAATACCACTGCAAGCATAAGAGATAAGGTACAGAGAGAATTTGCGCTTTTTTATAAACTTGATATAGTACCAATAGATAATGATGATATCAACTATAGGTTAATAAGTTGTAACACCTCAGTCCTTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAGAGATAAAAATTTCAATGGAACAGGACAATGTAAAAATGTCAGCACAGTGCAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTCAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAAATCTGAAAATATCACAGACAATACTAAAACTATAATAGTACAGCTGAATGCATCTGTAGCAATTGTTTGTACAAGACCCAACAACAATACAAGAAAAAGTATACCTATAGGGCCAGGGAGAGCGTTTTATGCAGCAGGAGATATAATAGGAGACATAAGACGAGCACACTGTATCCTTAACAAAACAACATGGGATAACACAATAGAACAGGTAGCTAAAAAATTAAGAGAACAATTTGAGAATAAGACAATAGTCTTTAGTGAATCCTCGGGAGGGGACCCAGAAATTACAATGATTAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAGTACAACTGTTTAATAGTACTTGGCATAATAATGGGAGTAGTACTACAGGGTCAAGTAGCAGTGAAGGCAATATCACACTCCCATGCAAAATAAAACAAATTATAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGAGGACCAATTAGCTGCGAGTCAAATATTACAGGGTTGCTACTAACAAGAGATGGTGGGAATGACGCTAACGGGAACAACACCGAGACCTTCAGACCTGGAGGAGGAAATATGAGGGACAATTGGCGAAGTGAATTATATAGATACAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAGGGCACAGAGAAGAGTGGTACAGAGAGAAAAAAGAGCAGTGGGTCTCGGAGCCTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATAACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATATGTTGCAACTCACAGTCTGGGGCATCAAACAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGCTTTGGGGTTGCTCTGGAAAACTCATCTGCAACACTGCTGTGCCTTGGAATACTAGTTGGAGTAACAAATCTCTGGATGATATTTGGCATAACATGACCTGGATGGAGTGGGAAAGAGAAATTGACAATTACACAAACATAATATACAGCTTAATTGAGGCATCGCAAACCCAGCAAGAAAAGAATGAACAAGAATTACTAGAATTAGACAAATGGGCAAGTCTGTGGAATTGGTTTAGCATATCAAACTGGCTGTGGTACATAAAAATATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTACTATACTTTCTATAGTGAATAGAGTTAGGAAGGGATACTCACCATTATCGTTTCAGACCCACCTCCCAGCCCAGAGGGGACCCGACAGGCCCGAAGGAACCGGAGACGAAGGTGGAGAGCAAGACAGAGACAGATCCACTCACTTAGTGGATGGATTCTTAGCAATTATCTGGGTCGACCTGCGGAACCTGTGCCTCTTCCTCTACCACCGCTTGAGAGACTTACTCTTGATTGCAGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATACTGGTGGAACCTTCTGCAGTATTGGAGGCGGGAACTAAAGAATAGTGCTGTTAGCTTGCTTAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGAGTTATAGAAGGATTGCGCAGAGCTTTTAGAGCTATTATCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGAGCTTTGCAATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8381, - "end": 8473, - "orientation": "forward", - "distance": 0.5806451612903225, - "indel_impact": 0, - "protein": "RPTSQPRGDPTGPKEPETKVESKTETDPLT", - "nucleotides": "AGACCCACCTCCCAGCCCAGAGGGGACCCGACAGGCCCGAAGGAACCGGAGACGAAGGTGGAGAGCAAGACAGAGACAGATCCACTCACTTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8382, - "end": 8657, - "orientation": "forward", - "distance": 0.4623655913978495, - "indel_impact": 0, - "protein": "DPPPSPEGTRQARRNRRRRWRARQRQIHSLSGWILSNYLGRPAEPVPLPLPPLERLTLDCSEDCGTSGTQGVGSPQILVEPSAVLEAGTKE", - "nucleotides": "GACCCACCTCCCAGCCCAGAGGGGACCCGACAGGCCCGAAGGAACCGGAGACGAAGGTGGAGAGCAAGACAGAGACAGATCCACTCACTTAGTGGATGGATTCTTAGCAATTATCTGGGTCGACCTGCGGAACCTGTGCCTCTTCCTCTACCACCGCTTGAGAGACTTACTCTTGATTGCAGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATACTGGTGGAACCTTCTGCAGTATTGGAGGCGGGAACTAAAGAATAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8801, - "end": 9434, - "orientation": "forward", - "distance": 0.5274038461538462, - "indel_impact": 0, - "protein": "MGGKWSKCSIVGWPTVRERIRRAEPAAEGVGAVSRDLEKHGAITSSNANNADCTWLEAQKEEEEVGFPVRPQVPLRPMTYKAALDLSHFLKEKGGLEGIIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRFPLTFGWCFKLVPVEPERVEEENKRENRCLLHPMSQHGMDDPEKEVLQWRFDSRLAFHHVARELHPEYYKN", - "nucleotides": "ATGGGTGGTAAATGGTCAAAATGTAGTATAGTTGGATGGCCTACTGTAAGGGAAAGAATAAGACGAGCAGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAAGCATGGAGCAATCACAAGTAGCAATGCTAACAATGCTGATTGTACCTGGCTGGAAGCCCAAAAAGAAGAGGAGGAGGTAGGCTTTCCAGTCAGGCCTCAGGTACCCTTAAGACCAATGACTTACAAGGCAGCCTTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGATAATTTACTCCCAAAAAAGACAAGATATTCTTGATCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACTAGATTTCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGAGCCAGAGAGGGTAGAAGAGGAGAATAAAAGAGAGAACCGCTGCTTGTTACACCCTATGAGCCAGCATGGGATGGATGACCCAGAGAAAGAAGTGCTACAGTGGAGGTTTGACAGCCGCCTAGCCTTTCACCACGTAGCCAGAGAGCTGCATCCGGAGTACTATAAGAACTAGAACTGCTGACATCTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115571.1": [ - { - "region": "gag", - "start": 579, - "end": 2078, - "orientation": "forward", - "distance": 0.2675944333996021, - "indel_impact": 0, - "protein": "MGARASILSGGKLDSWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", - "nucleotides": "ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATTCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGTTAATGTAATGATGCAAAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1871, - "end": 4882, - "orientation": "forward", - "distance": 0.15109343936381703, - "indel_impact": 0, - "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEAFLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAAATAGGGGGGCAATTTAAAGAAGCTTTCTTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTAAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTGCTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4827, - "end": 5405, - "orientation": "forward", - "distance": 0.37823834196891193, - "indel_impact": 0, - "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", - "nucleotides": "ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5345, - "end": 5635, - "orientation": "forward", - "distance": 0.2886597938144331, - "indel_impact": 0, - "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", - "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5616, - "end": 5830, - "orientation": "forward", - "distance": 0.25, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGCAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5755, - "end": 5830, - "orientation": "forward", - "distance": 0.46153846153846145, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5847, - "end": 6092, - "orientation": "forward", - "distance": 0.6931034482758622, - "indel_impact": 0, - "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL", - "nucleotides": "ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 6010, - "end": 8550, - "orientation": "forward", - "distance": 0.5251716247139588, - "indel_impact": 0, - "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEVHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRDKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGNMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLDQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", - "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAGATAAGATGCAGAAAGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATGATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGGGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGGGACTGAGACCTTCAGACCTGGAGGAGGAAATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGGATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTTAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8132, - "end": 8224, - "orientation": "forward", - "distance": 0.29032258064516125, - "indel_impact": 0, - "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", - "nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8133, - "end": 8408, - "orientation": "forward", - "distance": 0.423913043478261, - "indel_impact": 0, - "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", - "nucleotides": "GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8552, - "end": 9166, - "orientation": "forward", - "distance": 0.5478260869565217, - "indel_impact": 0, - "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDNEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", - "nucleotides": "ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAATGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGATACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTGCACCCTACGAGCCTGCATGGGGTGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115514.1": [ - { - "region": "gag", - "start": 584, - "end": 2083, - "orientation": "forward", - "distance": 0.2675944333996021, - "indel_impact": 0, - "protein": "MGARASILSGGKLDEWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", - "nucleotides": "ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGAATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGTTAATGTAATGATGCAAAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1876, - "end": 4887, - "orientation": "forward", - "distance": 0.14811133200795235, - "indel_impact": 0, - "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGCTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAATACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTGCTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4832, - "end": 5410, - "orientation": "forward", - "distance": 0.37823834196891193, - "indel_impact": 0, - "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGDIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", - "nucleotides": "ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGAGATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5350, - "end": 5640, - "orientation": "forward", - "distance": 0.2886597938144331, - "indel_impact": 0, - "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", - "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5621, - "end": 5835, - "orientation": "forward", - "distance": 0.25, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5760, - "end": 5835, - "orientation": "forward", - "distance": 0.46153846153846145, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5852, - "end": 6097, - "orientation": "forward", - "distance": 0.6931034482758622, - "indel_impact": 0, - "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL", - "nucleotides": "ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 6015, - "end": 8555, - "orientation": "forward", - "distance": 0.528604118993135, - "indel_impact": 0, - "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEVHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNITNTNSSSKEGKMAEEMRNCSFNITTEIRNKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKSVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIVFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", - "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATATTACTAATACCAATAGCAGTAGTAAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAGAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATGATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAGTGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGGGAATAAAACAATAGTCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGGGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTAAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8137, - "end": 8229, - "orientation": "forward", - "distance": 0.29032258064516125, - "indel_impact": 0, - "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", - "nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8138, - "end": 8413, - "orientation": "forward", - "distance": 0.423913043478261, - "indel_impact": 0, - "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", - "nucleotides": "GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8557, - "end": 9171, - "orientation": "forward", - "distance": 0.5478260869565217, - "indel_impact": 0, - "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDGEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", - "nucleotides": "ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACGGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTACGAGCCTGCATGGGGTGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115488.1": [ - { - "region": "gag", - "start": 707, - "end": 2206, - "orientation": "forward", - "distance": 0.2675944333996021, - "indel_impact": 0, - "protein": "MGARASILSGGKLDEWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", - "nucleotides": "ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGAATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGTTAATGTAATGATGCAAAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1999, - "end": 5010, - "orientation": "forward", - "distance": 0.14811133200795235, - "indel_impact": 0, - "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGCTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAATACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTGCTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4955, - "end": 5533, - "orientation": "forward", - "distance": 0.37823834196891193, - "indel_impact": 0, - "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGDIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", - "nucleotides": "ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGAGATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5473, - "end": 5763, - "orientation": "forward", - "distance": 0.2886597938144331, - "indel_impact": 0, - "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", - "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5744, - "end": 5958, - "orientation": "forward", - "distance": 0.25, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5883, - "end": 5958, - "orientation": "forward", - "distance": 0.46153846153846145, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5975, - "end": 6220, - "orientation": "forward", - "distance": 0.6931034482758622, - "indel_impact": 0, - "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL", - "nucleotides": "ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 6138, - "end": 8678, - "orientation": "forward", - "distance": 0.528604118993135, - "indel_impact": 0, - "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEVHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNITNTNSSSKEGKMAEEMRNCSFNITTEIRNKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKSVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIVFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", - "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATATTACTAATACCAATAGCAGTAGTAAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAGAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATGATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAGTGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGGGAATAAAACAATAGTCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGGGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTAAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8260, - "end": 8352, - "orientation": "forward", - "distance": 0.29032258064516125, - "indel_impact": 0, - "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", - "nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8261, - "end": 8536, - "orientation": "forward", - "distance": 0.423913043478261, - "indel_impact": 0, - "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", - "nucleotides": "GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8680, - "end": 9294, - "orientation": "forward", - "distance": 0.5478260869565217, - "indel_impact": 0, - "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDGEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", - "nucleotides": "ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACGGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTACGAGCCTGCATGGGGTGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115030.1": [ - { - "region": "gag", - "start": 176, - "end": 1684, - "orientation": "forward", - "distance": 0.2328685258964145, - "indel_impact": 0, - "protein": "MGARASVLSGGELDRWEKIYLRPGGKKKYRLKHIVWASRELERFAVNPGLLESSEGCRQILGQLQPALQTGSEELRSLYNTVAVLYCVHQRINVKDTKEALDKIEEEQNKSKKKAQQAAADTGNSSNSQVSQNYPIVQNLQGQMVHQPISPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINDEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHTPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTKSASIMVQGGNFRNQRKNVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPAESFRFGEEATAPPQKQETKDQELYPLASLRSLFGNDP", - "nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGCGGAGAATTAGATAGATGGGAAAAAATTTACCTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTCAACCCTGGCCTGTTAGAGTCATCAGAAGGCTGCAGGCAAATTCTGGGACAGCTACAACCAGCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAGTCCTCTATTGTGTACATCAAAGGATAAATGTAAAAGACACCAAGGAAGCTCTAGACAAAATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAACAAGCAGCAGCTGACACAGGAAACAGCAGCAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACCTACAGGGGCAAATGGTACATCAACCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAATAGAAGAAAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCACTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAGATGTTAAAAGAAACCATCAATGATGAGGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGCCCTATTGCACCAGGCCAGATGAGGGAACCAAGGGGAAGTGACATAGCAGGAACCACTAGTACCCTTCAGGAACAAATAGGATGGATGACACATACTCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTGGGGTTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTTTGGACATAAGACAAGGACCAAAGGAACCCTTTAGGGATTATGTAGACCGGTTCTATAAAACTCTAAGGGCTGAGCAAGCGTCACAGGATGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAATCCAGATTGTAAGACCATTTTAAAAGCATTAGGACCAGCAGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGGGGACCCGGCCACAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAAATCAGCCTCCATAATGGTGCAGGGAGGCAATTTTAGGAACCAAAGAAAGAATGTTAAGTGTTTCAATTGTGGCAAAGAAGGACACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATTTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTGGGGAAGAGGCAACAGCTCCCCCTCAGAAGCAGGAGACGAAGGACCAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1477, - "end": 4488, - "orientation": "forward", - "distance": 0.22266401590457252, - "indel_impact": 0, - "protein": "FFREDLAFPQGEAREFSPEQTRANSPASRELQVWGRGNSSPSEAGDEGPGTVSFSFPQITLWQRPIISIKIGGQLKEALLDTGADDTVLEEMNLPGKWKPKMIGGIGGFIKVRQYDQVPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALIEICAEMEKEGKITKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKRKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNEAPGVRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKENPDIVIYQYMDDLYVGSDLEIEQHRTKIEELRQHLLGWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPDKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKEPVHGAYYDPSKDLIAEVQKQGGDQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIALEAIVIWGKTPKFKLPIQKETWEMWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRETKLGKAGYVTDRGRQKVVSLMDTTNQRTELHAIHLALQDSGSTVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKIYLAWVPAHKGIGGNEQVDKLVSNGIRKILFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEPMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIAAETGQETAYFILKLAGRWPVKIIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNNELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQISKIQNFRVYYRDSREPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKILRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAGATTTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTGGGGAAGAGGCAACAGCTCCCCCTCAGAAGCAGGAGACGAAGGACCAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAAAAATAGGAGGGCAACTAAAAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGGTACCCATAGAAATTTGTGGACATAAAGCTATAGGTACAGTATTAATAGGACCTACACCAGTCAACATAATTGGAAGAAATCTGTTGACCCAGCTTGGTTGCACTTTAAATTTTCCCATCAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAGAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAGGCATTAATAGAAATTTGTGCAGAAATGGAAAAGGAAGGGAAAATTACAAAAATTGGGCCTGAGAATCCATATAATACTCCAGTGTTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCCGAGAACTTAATAAGAGAACACAAGACTTCTGGGAAGTTCAACTAGGAATACCACATCCCGCAGGGTTAAAAAGGAAAAAATCAGTAACAGTACTAGATGTGGGTGATGCATATTTTTCAGTTCCTTTAGACAAAGACTTCAGAAAGTATACTGCATTTACCATACCCAGTATAAACAATGAGGCACCAGGAGTTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGGAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGAGCAGCACAGAACAAAAATAGAGGAACTGAGACAACATCTGTTGGGGTGGGGACTCACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATACAGCTGCCAGACAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAGTTGAATTGGGCAAGCCAGATCTATCCAGGGATTAAAGTAAAGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGAACCAGTACATGGAGCATATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGGGGAGACCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGGAAATATGCAAGAACGAGGGGTGCCCACACTAATGATGTAAAACAGTTAACAGAGGCAGTGCAAAAAATAGCCCTAGAAGCCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCTATACAAAAGGAAACATGGGAAATGTGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAATTTGTCAATACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAGAAAGAACCCATAATAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAGACTAAATTAGGAAAAGCAGGGTATGTTACGGACAGAGGAAGACAAAAGGTTGTCTCCCTAATGGACACAACAAATCAGAGGACTGAGTTACACGCAATTCATCTAGCTTTGCAGGATTCAGGATCAACAGTAAACATAGTGACAGACTCACAATATGCCTTAGGGATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGCTAATAAAAAAGGAAAAGATTTACCTGGCATGGGTCCCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTAATGGAATCAGAAAAATACTATTTCTGGATGGAATAGATAAGGCCCAAGAAGAACATGAAAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAGTGTCAGTTAAAAGGAGAACCTATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTGGATTGTACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATACATAGAAGCAGAAGTTATTGCAGCAGAAACAGGGCAGGAAACAGCATACTTTATCTTAAAATTGGCAGGAAGATGGCCAGTAAAAATAATACATACAGACAATGGCAGCAATTTCACCAGCACTACGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGTATTCCCTACAATCCTCAAAGTCAAGGAGTAGTAGAATCTATGAATAATGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATTCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAACTACAAAAACAAATTTCAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGAACCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCCTTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4433, - "end": 5011, - "orientation": "forward", - "distance": 0.3969072164948453, - "indel_impact": 0, - "protein": "MENRWQVMIVWQVDRMRIRTWNSLVKHHMHISKKAQRWVYRHHYESHNPKTSSEVHIPLGEARLVIKTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPGLADQLIHLYYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALTALITPKRRKPPLPSVTKLTEDRWNKSQRTKGHKGSHTMNGH", - "nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATATTTCAAAGAAAGCTCAGAGATGGGTTTACAGACATCACTATGAAAGCCATAATCCAAAAACAAGTTCAGAAGTACACATCCCATTAGGGGAAGCAAGATTGGTAATAAAAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGCCAGGGAGTATCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTGGCCTGGCAGACCAACTAATTCACCTGTATTATTTTGATTGTTTTTCAGAATCTGCTATAAGGAATGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAGGCAGGACATAACAAGGTAGGATCCCTACAATACTTGGCATTAACAGCATTAATAACACCAAAAAGGAGAAAGCCACCTTTGCCCAGTGTTACAAAATTAACAGAGGACAGATGGAACAAGTCCCAGAGGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGACATTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 4951, - "end": 5241, - "orientation": "forward", - "distance": 0.34375, - "indel_impact": 0, - "protein": "MEQVPEDQGPQREPYNEWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDTWAGVEDIIRTLQQLLFIHFRIGCQHSRIGIIRQRRARNGASRS", - "nucleotides": "ATGGAACAAGTCCCAGAGGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGACATTAGAGCTCTTAGAAGAGCTTAAGAGTGAAGCTGTTAGGCATTTTCCTAGACCATGGCTTCATAGCTTAGGACAATATATCTATGAAACTTATGGAGATACTTGGGCAGGGGTGGAAGACATAATAAGAACCCTGCAACAACTGTTGTTCATTCATTTCAGAATTGGGTGTCAACATAGCAGGATAGGAATTATTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5222, - "end": 5436, - "orientation": "forward", - "distance": 0.5945945945945945, - "indel_impact": 0, - "protein": "MEPVDPSLEPWKHPGSQPMTACNNCYCKRCCFHCQVCFTRKGLGISYGRKKRRQRRRASHSSQNHQAALPEX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGCTTAGAGCCCTGGAAACATCCAGGAAGTCAACCTATGACTGCTTGTAACAATTGCTATTGTAAACGGTGTTGCTTTCATTGCCAAGTTTGCTTCACAAGAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5361, - "end": 5436, - "orientation": "forward", - "distance": 0.5925925925925926, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLTAVRIIKRLYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5453, - "end": 5698, - "orientation": "forward", - "distance": 0.8160919540229887, - "indel_impact": 0, - "protein": "MHILEIAAIVGLVVAAIIAIVVWSIVLIEYRKILRQRKIDRLIDRIRERAEDSGNESDGDQEELSAIVEMGHLVPWDGDDM", - "nucleotides": "ATGCATATCTTAGAAATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGTCTATAGTATTAATAGAATATAGGAAAATTTTAAGACAAAGGAAAATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAAAGTGACGGGGACCAGGAGGAATTATCAGCAATTGTGGAGATGGGGCATCTTGTTCCTTGGGATGGTGATGATATGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 5616, - "end": 8216, - "orientation": "forward", - "distance": 0.6431111111111119, - "indel_impact": 0, - "protein": "MKVTGTRRNYQQLWRWGILFLGMVMICSARNLWVTVYYGVPVWKEATTTLFCASDAKAYETEKHNVWATHACVPTDPSPQEVALENVTETFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDNLNLNRPNNNTCSNNTNYNITEKGEIKNCSFNVTTGIRDRVTKEHALFYKLDVVPIDEGSGNTTGKYRMINCNTSVITQACPKVSFEPIPIHFCAPAGFAILKCKDKKFNGTGECRNVSTVQCTHGIRPVVSTQLLLNGSLAEEEIVIRSANLSNNAKTIIVQLNKSVKINCTRPNNNTRRSIHIGPGRAFYGTDIIGDIRQAHCNISGKDWNDTLKQIVIKLKEKFENKTIVFTQSSGGDPEIVMHSFNCGGEFFYCNTTQLFNSTWNNNTWNGTDNWNGTESNSTITLPCRIKQIINLWQEVGRAMYAPPIQGQIRCSSNITGLLLVRDGGSNNSSNDTETFRPGGGDMRDNWRSELYKYKVVKIEPLGIAPTHAKRRVVQREKRAMGLGAFFLGFLGAAGSTMGAASVTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLALERYLKDQQLLGIWGCSGKLICTTNVPWNISWNPRWNRSLDEIWTNMTWKQWEKEIDNYTDIIYNLIEQSQNQQEQNEQDLLALDKWASLWNWFDITQWLWYIKIFIMIVGGLIGLRIVFTILSIVNRVRQGYSPLSLQTLLPTQRGPDRPEGTEEGGGERDRGTSTRLVHGFLALIWDDLRSLFLFSYHRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWSQEIKNSAVSLLNTTAIAVAEGTDRIIEVLQRGFRAILHIPTRIRQGLERALL", - "nucleotides": "ATGAAAGTGACGGGGACCAGGAGGAATTATCAGCAATTGTGGAGATGGGGCATCTTGTTCCTTGGGATGGTGATGATATGTAGTGCCAGAAACTTGTGGGTCACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAACTACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGAGACAGAGAAGCATAATGTTTGGGCAACCCATGCCTGTGTACCCACAGACCCCAGCCCACAGGAAGTAGCATTGGAAAATGTGACAGAAACATTTAACATGTGGAAAAATGACATGGTAGAGCAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTGAAACTAACCCCACTCTGTGTTACTTTAAATTGCACTGATAATTTAAATCTAAATCGCCCTAACAATAATACTTGTAGTAATAATACTAATTATAATATAACGGAAAAAGGAGAAATAAAAAACTGCTCTTTCAATGTCACCACAGGAATAAGAGATAGGGTGACAAAAGAACATGCACTTTTCTATAAACTTGATGTAGTACCAATAGATGAAGGTAGTGGAAATACTACGGGCAAATATAGGATGATAAATTGTAACACCTCAGTCATTACACAAGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTTTTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAAGATAAGAAATTCAATGGAACAGGAGAATGTAGAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGCTGAACGGCAGTCTAGCAGAAGAAGAGATAGTAATTAGATCTGCCAATCTCTCGAACAATGCTAAAACCATAATAGTACAGCTGAATAAATCTGTAAAAATTAATTGTACAAGACCCAACAATAATACAAGAAGAAGTATACACATAGGACCAGGGAGGGCATTTTATGGAACAGACATAATAGGGGATATAAGACAAGCGCATTGTAACATTAGTGGGAAAGATTGGAATGACACTTTAAAACAGATAGTTATAAAATTAAAAGAAAAATTTGAGAATAAAACAATAGTCTTTACTCAATCCTCAGGAGGGGACCCAGAGATAGTGATGCATAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAATAATACTTGGAATGGTACTGATAATTGGAATGGTACTGAATCAAATAGCACTATCACACTCCCATGCAGAATAAAACAAATTATAAACTTGTGGCAGGAAGTAGGAAGAGCAATGTATGCCCCTCCCATCCAAGGGCAAATTAGATGTTCATCAAATATTACAGGGCTGCTGCTAGTAAGAGATGGTGGAAGCAACAATAGTAGTAATGATACAGAGACCTTCAGGCCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTCAAAATTGAACCATTAGGAATAGCACCCACCCATGCAAAGAGAAGAGTGGTGCAAAGGGAAAAAAGAGCAATGGGACTTGGAGCTTTCTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTTTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTAATGTGCCCTGGAATATTAGTTGGAACCCTAGATGGAATAGATCTCTAGATGAGATTTGGACTAACATGACCTGGAAGCAGTGGGAAAAGGAAATTGACAATTACACAGACATAATATATAACTTAATTGAACAATCACAGAACCAACAAGAACAGAATGAACAAGACTTATTGGCATTAGATAAGTGGGCAAGTTTGTGGAATTGGTTTGACATTACACAGTGGCTATGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGATAGGTTTAAGAATAGTTTTTACTATACTGTCTATAGTAAATAGAGTTAGGCAGGGATACTCACCATTGTCATTGCAGACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAGGAACAGAAGAAGGAGGTGGAGAGAGAGACAGAGGCACATCCACGAGATTAGTGCATGGATTCTTAGCACTCATCTGGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAGAGACTTACTCTTGATTGCGGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCACTGAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAAATAAAGAATAGTGCAGTTAGCTTGCTCAACACCACAGCAATAGCAGTAGCTGAGGGGACAGATAGGATTATAGAAGTATTACAAAGAGGCTTTAGAGCTATTCTCCACATACCTACACGAATAAGACAGGGCTTAGAAAGGGCTTTGCTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 7798, - "end": 7890, - "orientation": "forward", - "distance": 0.5806451612903225, - "indel_impact": 0, - "protein": "RPSSQPRGDPTGPKEQKKEVERETEAHPRD", - "nucleotides": "AGACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAGGAACAGAAGAAGGAGGTGGAGAGAGAGACAGAGGCACATCCACGAGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 7799, - "end": 8074, - "orientation": "forward", - "distance": 0.423913043478261, - "indel_impact": 0, - "protein": "DPPPNPEGTRQARRNRRRRWRERQRHIHEISAWILSTHLGRPAEPVPLQLPPLERLTLDCGEDCGTSGTQGVGSTEVLVESPAVLESGNKE", - "nucleotides": "GACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAGGAACAGAAGAAGGAGGTGGAGAGAGAGACAGAGGCACATCCACGAGATTAGTGCATGGATTCTTAGCACTCATCTGGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAGAGACTTACTCTTGATTGCGGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCACTGAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAAATAAAGAATAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8218, - "end": 8859, - "orientation": "forward", - "distance": 0.7375565610859729, - "indel_impact": 0, - "protein": "MGNKLSRGLRAGWPAIRERMRRARPVREPEPAAAGVGAASRDLERHGALTSSNTAATNTDVACLEAQQEEEEVGFPVRPQVPLRPMTYKGALDISHFLKEKGGLDGLIYSKRRQDILDLWLYNTQGYFPDWQNYTPGPGVRFPLCFGWCFKLVPVDPDKVEEASVGENNCLLSPENLHGMEDEHREVLQWRFDSRLAFHHVARELHPEYYKDC", - "nucleotides": "ATGGGTAACAAGTTGTCAAGAGGGCTCAGGGCTGGATGGCCTGCCATAAGGGAGAGAATGAGACGAGCTAGGCCAGTAAGAGAGCCAGAGCCAGCAGCAGCTGGGGTGGGAGCAGCATCTCGAGACCTGGAAAGACATGGAGCACTTACAAGTAGCAATACAGCAGCTACCAATACTGATGTTGCCTGCCTGGAAGCACAACAGGAGGAGGAAGAGGTGGGTTTTCCAGTCAGACCTCAGGTACCTTTAAGGCCAATGACTTACAAGGGAGCTCTAGATATTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGATGGGCTAATTTACTCCAAAAGAAGACAAGACATCCTTGATCTGTGGCTCTACAACACACAAGGCTACTTCCCTGACTGGCAGAACTACACACCAGGGCCAGGAGTCAGATTTCCACTGTGTTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGATAAGGTAGAAGAGGCCAGTGTAGGAGAGAACAACTGCTTGTTAAGCCCCGAAAACCTGCATGGAATGGAGGACGAACACAGAGAAGTATTGCAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTATTACAAGGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115498.1": [ - { - "region": "gag", - "start": 663, - "end": 2162, - "orientation": "forward", - "distance": 0.2675944333996021, - "indel_impact": 0, - "protein": "MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", - "nucleotides": "ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGACGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGCTAATGTAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1955, - "end": 4966, - "orientation": "forward", - "distance": 0.15109343936381703, - "indel_impact": 0, - "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTACTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4911, - "end": 5489, - "orientation": "forward", - "distance": 0.36269430051813467, - "indel_impact": 0, - "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", - "nucleotides": "ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5429, - "end": 5719, - "orientation": "forward", - "distance": 0.2886597938144331, - "indel_impact": 0, - "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", - "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5700, - "end": 5914, - "orientation": "forward", - "distance": 0.25, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5839, - "end": 5914, - "orientation": "forward", - "distance": 0.46153846153846145, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5931, - "end": 6176, - "orientation": "forward", - "distance": 0.727586206896552, - "indel_impact": 0, - "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL", - "nucleotides": "ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAAAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 6094, - "end": 8634, - "orientation": "forward", - "distance": 0.5241695303550973, - "indel_impact": 0, - "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", - "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGAAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAAAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8216, - "end": 8308, - "orientation": "forward", - "distance": 0.29032258064516125, - "indel_impact": 0, - "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", - "nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8217, - "end": 8492, - "orientation": "forward", - "distance": 0.3913043478260869, - "indel_impact": 0, - "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", - "nucleotides": "GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8636, - "end": 9250, - "orientation": "forward", - "distance": 0.5333333333333332, - "indel_impact": 0, - "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", - "nucleotides": "ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTATGAGCCTGCATGGGATGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115211.1": [ - { - "region": "gag", - "start": 250, - "end": 1752, - "orientation": "forward", - "distance": 0.20813492063492078, - "indel_impact": 0, - "protein": "MGARASVLSGGELDKWEKIRLRPGGKKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPALQTGSEELKSLFNTVAVLYCVHQKIEIKDTKEALDKIEEEQNKSKKKTQQAAADTGHNSQVSQNYPIVQNMQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSNAVMMQKGNFRSQRKMVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFPQSRTEPTAPPEESFSFGEKTTTPSQKQEPIDQELYPLSSLKSLFGNDP", - "nucleotides": "ATGGGTGCGAGAGCGTCGGTATTAAGCGGGGGAGAATTAGACAAATGGGAGAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAGATATAAGTTAAAACATATAGTATGGGCAAGCAGGGAACTGGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCAGCCCTTCAGACAGGATCAGAGGAACTTAAATCATTATTTAATACAGTAGCAGTCCTCTATTGTGTGCATCAAAAGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAACAAAACAAAAGTAAGAAAAAGACACAGCAAGCAGCAGCTGACACAGGACATAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATGCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTGAAAGTAGTAGAAGAGAAGGCATTTAGCCCAGAAGTAATACCCATGTTTTCAGCCTTATCAGAAGGAGCCACCCCACAAGATTTGAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACTATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCGATTGCACCAGGCCAGATGAGAGAGCCGAGGGGAAGTGACATAGCAGGAACCACCAGTACCCTTCAGGAGCAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTACAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGATCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCAAACCCAGATTGTAAAACCATTTTAAAAGCATTGGGTCCAGCAGCTACACTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCGGAAGCAATGAGCCAAGTAACAAACTCAAATGCCGTGATGATGCAGAAAGGCAATTTTAGGAGCCAAAGAAAGATGGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCAAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCAGGGAACTTTCCTCAAAGCAGGACAGAACCAACAGCCCCACCAGAGGAGAGCTTCAGTTTTGGGGAAAAGACAACAACTCCTTCTCAGAAGCAGGAGCCGATAGACCAGGAACTATACCCTCTGTCTTCCCTCAAATCACTCTTTGGCAACGACCCTTAGTCACAGTAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1545, - "end": 4556, - "orientation": "forward", - "distance": 0.2109018830525271, - "indel_impact": 0, - "protein": "FFRENLAFPQGKARELSSKQDRTNSPTRGELQFWGKDNNSFSEAGADRPGTIPSVFPQITLWQRPLVTVRIGGQLKEALLDTGADDTVLEDMNLPGRWKPKMIGGIGGFIKVKQYDQISVEICGHKAIGTVLIGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALTEICKEMEKEGKISRIGPENPYNTPIFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKVEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGAKALTEVVPLTEEAELELAENREILKEPVHGVYYDPTKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWESWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRETKSGKAGYVTDRGRQKVISLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIQKEKVYLAWVPAHKGIGGNEQVDKLVSSGIRKVLFLDGIDKAQEDHEKYHSNWRAMASDFNLPPVIAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVTTIHTDNGPNFTSNAVKAACWWAGVKQEFGIPYNPQSQGVVESMNKQLKQIIGQVREQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKVKIIRDYGKQMAGDGCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCAGGGAACTTTCCTCAAAGCAGGACAGAACCAACAGCCCCACCAGAGGAGAGCTTCAGTTTTGGGGAAAAGACAACAACTCCTTCTCAGAAGCAGGAGCCGATAGACCAGGAACTATACCCTCTGTCTTCCCTCAAATCACTCTTTGGCAACGACCCTTAGTCACAGTAAGGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTGTTAGAAGACATGAATTTACCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAAACAGTATGATCAGATATCAGTAGAAATTTGTGGACATAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAATATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGTACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAGAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAACAGAAATTTGTAAGGAAATGGAAAAGGAAGGGAAAATTTCAAGAATTGGGCCTGAAAATCCATACAATACTCCAATATTTGCAATAAAGAAAAAAGACAGTACTAAATGGAGAAAGTTAGTAGATTTTAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACACCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCCTAGATAAAGACTTCAGGAAATATACTGCATTTACCATACCTAGTACAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTACTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCTGACATAGTTATATATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAGTAGAGGAACTGAGACAACATCTGTTGAAGTGGGGGTTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATACAACTGCCAGAGAAAGACAGCTGGACTGTCAATGATATACAGAAATTAGTGGGAAAGTTGAATTGGGCAAGTCAGATTTATCCAGGGATTAAAGTAAGGCAGTTATGTAAACTACTTAGAGGAGCCAAGGCGCTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATCCTAAAAGAACCAGTACATGGAGTGTATTATGACCCAACAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGTCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAACTTGAAAACAGGAAAATATGCAAGAACAAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAGACCCCTAAATTTAGACTACCCATTCAAAAAGAAACATGGGAATCATGGTGGACAGAATATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAAAAAGAACCCATAATAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACAAAATCAGGAAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAAGTTATTTCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAGGCACAACCAGATAAAAGCGAATCAGAGTTAGTCAATCAAATAATAGAGCAATTAATACAAAAGGAAAAGGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGGGGAAATGAACAAGTAGATAAATTAGTCAGTTCTGGAATCAGAAAGGTACTATTTTTGGATGGAATAGATAAAGCCCAAGAAGATCATGAAAAATATCACAGTAATTGGAGAGCAATGGCTAGTGACTTTAACCTACCACCTGTAATAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCTATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAATTAGATTGTACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCTAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGACAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAACAACAATACATACAGACAATGGCCCCAACTTCACCAGTAATGCAGTTAAGGCCGCCTGTTGGTGGGCAGGGGTCAAACAAGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTTGTAGAATCTATGAATAAACAATTAAAACAAATTATAGGGCAGGTAAGAGAGCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAGGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTGGTAATACAAGATAATAGTGACATAAAAGTAGTACCAAGAAGAAAAGTAAAAATTATTAGGGATTATGGAAAACAGATGGCAGGTGATGGTTGTGTGGCAAGTAGACAGGATGAGGATTAA", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4501, - "end": 5082, - "orientation": "forward", - "distance": 0.4215384615384614, - "indel_impact": 0, - "protein": "MENRWQVMVVWQVDRMRINMWKSLVKYHIHNSGKARGWCYRHHYESRHPRISSEVHIPLGGEARLVVTTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPELADQLIHLYYFNCFSESAIRQAILGHRVSPRCEYQAGHNKVGSLQYLALIALIPPKRRKPPLPSVRKLTEDRWNEPQRTKGHRESHTMNGH", - "nucleotides": "ATGGAAAACAGATGGCAGGTGATGGTTGTGTGGCAAGTAGACAGGATGAGGATTAACATGTGGAAAAGCTTAGTAAAATACCATATACATAATTCAGGGAAGGCTAGGGGATGGTGTTATAGACATCACTATGAAAGCAGACATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGAGGAGAGGCTAGACTAGTAGTGACAACATATTGGGGCCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAAGGAGTCTCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTATTTTAATTGTTTTTCAGAATCTGCTATAAGACAAGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAATAAGGTAGGATCTCTGCAGTACTTGGCACTAATAGCATTAATACCACCAAAAAGGAGAAAGCCACCTTTGCCTAGTGTGAGGAAACTGACAGAGGATAGATGGAACGAGCCCCAGAGGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5022, - "end": 5312, - "orientation": "forward", - "distance": 0.38144329896907214, - "indel_impact": 0, - "protein": "MERAPEDQGPQREPYNEWTLELLEELKREAVRHFPRAWLHGLGQHIYETYGDTWAGVEALIRTLQQLVFIHFRIGCQHSRIGILTHRRARNGASRS", - "nucleotides": "ATGGAACGAGCCCCAGAGGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAGAACTTTTAGAAGAGCTTAAAAGAGAAGCTGTTAGGCACTTTCCTAGGGCATGGCTTCACGGCCTAGGACAACATATCTATGAAACTTATGGAGATACTTGGGCAGGAGTGGAAGCTCTAATAAGAACTCTGCAACAACTGGTGTTTATTCATTTCAGAATTGGATGTCAGCACAGCAGGATAGGCATTCTTACCCATAGAAGGGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5293, - "end": 5507, - "orientation": "forward", - "distance": 0.547945205479452, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPKTACTSCYCKRCCYHCPRCFLTKGLGISYGRKKRRQRRRTPQDNQNHQVSLSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTGGAGCCCTGGAAGCATCCAGGGAGTCAGCCTAAGACTGCTTGCACCAGTTGCTATTGTAAAAGGTGTTGCTATCATTGCCCGCGATGCTTCTTAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAGGATAACCAGAACCATCAAGTTTCTTTATCAAAACA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5432, - "end": 5507, - "orientation": "forward", - "distance": 0.7037037037037037, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLRITRTIKFLYQNX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAGGATAACCAGAACCATCAAGTTTCTTTATCAAAACA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5524, - "end": 5793, - "orientation": "forward", - "distance": 0.847826086956522, - "indel_impact": 0, - "protein": "MQSLEILAIVALVVAFIIAIVVWSIVFIEYRKIRRQKRIDRILDRIRERAEDSGNESEGDQEELAALVEMGNHAHLGMGHHAPWEVNDL", - "nucleotides": "ATGCAATCTTTAGAAATATTAGCAATAGTAGCATTAGTAGTAGCATTTATAATAGCAATAGTCGTTTGGTCTATAGTATTCATAGAATATAGGAAAATAAGAAGACAAAAGAGAATAGACAGGATACTTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGACCAAGAAGAATTGGCAGCCCTCGTGGAGATGGGGAACCATGCTCATTTGGGGATGGGCCACCATGCTCCTTGGGAGGTCAATGACCTGTAA", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 5687, - "end": 8197, - "orientation": "forward", - "distance": 1.2241972477064227, - "indel_impact": 757, - "protein": "MHSFNCRGEFFYCNTTQLFNSTWNSTTGVNGTGANNTEVNIITLPCKIKQIVNMWQEVGKAMYAPPISGQISCSSNITGLLLLRDGGNSNDTDNTTEIFRPGGGDMRDNWRSELYKYKVVQIEPLGVAPTKARRRVVQREKRAVGIGALFLGFLGAAGSTMGAASITLTVQARHLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTNVPWNSSWSNKNLSYIWNNMTWMQWEKEIDNYTEVIYNLIEQSQNQQEKNEQELLELDKWASLWTWFDITKWLWYIKIFIMIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTLLPAARGPDRPEGIGEEGGERDRDRSSHLAHGFLTIIWVDLRSLFLFSYHHLRDLLLIVARGVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRFIEVLQRIFRAIIHIPTRIRQGLERALL", - "nucleotides": "ATGAGAGTGAAGGGGACCAAGAAGAATTGGCAGCCCTCGTGGAGATGGGGAACCATGCTCATTTGGGGATGGGCCACCATGCTCCTTGGGAGGTCAATGACCTGTAATGCAGAAGACTCGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACCACCACTCTATTTTGTGCATCAGATGCCAAAGCATATGACCCAGAGGTACACAATGTCTGGGCCACACATGCTTGTGTACCCACAGACCCCAACCCACAAGAAGTAGAATTAAATGTGACAGAAAATTTTAATGCATGGAAAAATAACATGGTAGATCAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAACTGCACTGAGTTGAAGAATAGCACTACTATAAACAATACCAGTAGTATAGAAGAAGGAGAAATGAAAAACTGTTCTTTTAATGCCACCACAGCAATAAGAGATAAGGTGCAAAAACAATATGCACTTTTTTATAGACTTGATGTAGTACAAATTAAGGATGATAATACCAGCTATAATACCAGATATAGGTTGATAAATTGTAACTCCGCAACCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTAGGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGTTGTTAAATGGCAGTCTAGCAGAAGGAGATGTAGTAATTAGATCTAAAAATTTCTCTAACAGTGCTCAAACCATAATAGTACAATTAAACGAAACTGTAGAAATTAATTGTACAAGACCCAACAATAATACAAGAAAAAGTATACATATAGGACCAGGGAGAGCATTTTATGCAACAGGAGACATAATAGGAGATATAAGAAAAGCACATTGTAACATTAGTAGAGCAAAATGGAATGAGACTTTAAAAAAGATAGCTGATAAATTAAGGGAACAATTTAAAAGAGAGACAATAGTCTTTAACCAATCCTCAGGAGGGGACCCAGAGATTGTAATGCACAGTTTTAATTGTAGAGGGGAATTTTTCTACTGTAATACAACACAACTGTTTAATAGTACTTGGAATAGTACTACAGGAGTAAATGGTACAGGAGCAAATAATACAGAAGTAAATATTATCACACTCCCATGCAAGATAAAGCAAATTGTAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGTTGTTCATCAAATATTACAGGGCTACTACTACTAAGAGATGGTGGTAATAGCAATGACACGGATAACACGACTGAAATCTTCAGACCTGGAGGAGGAGATATGAGAGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTACAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAGGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTCTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCATCAATAACGCTGACGGTACAAGCCAGACACTTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATATGTTGCAGCTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTAGAAAGATACCTTAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATCTGCACCACTAATGTGCCTTGGAATAGTAGTTGGAGTAATAAAAATCTGAGTTACATTTGGAATAACATGACCTGGATGCAGTGGGAAAAAGAAATTGACAATTACACAGAGGTAATATACAACTTAATTGAACAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGACTTGGTTTGACATAACAAAGTGGCTGTGGTATATAAAAATATTTATAATGATAGTAGGAGGCTTAATAGGTTTAAGAATAGTTTTTACTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAGCGCATGGATTCTTAACAATTATCTGGGTCGACCTGCGGAGCCTGTTCCTCTTCAGCTACCACCACTTGAGAGACTTACTCTTGATTGTAGCGAGGGGTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGCCAGGAACTAAAGAATAGTGCTGTCAGCTTGCTCAACGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGTTTATAGAAGTATTGCAAAGAATTTTTAGAGCTATTATCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCTTTGCTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 7779, - "end": 7871, - "orientation": "forward", - "distance": 0.4838709677419355, - "indel_impact": 0, - "protein": "RPSSQPRGDPTGPKESEKKVERETETDPVT", - "nucleotides": "AGACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 7780, - "end": 8055, - "orientation": "forward", - "distance": 0.4565217391304348, - "indel_impact": 0, - "protein": "DPPPSREGTRQARRNRRRRWRERQRQIQSLSAWILNNYLGRPAEPVPLQLPPLERLTLDCSEGCGTSGTQGVGSPQVLVESPAVLEPGTKE", - "nucleotides": "GACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAGCGCATGGATTCTTAACAATTATCTGGGTCGACCTGCGGAGCCTGTTCCTCTTCAGCTACCACCACTTGAGAGACTTACTCTTGATTGTAGCGAGGGGTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGCCAGGAACTAAAGAATAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8199, - "end": 8822, - "orientation": "forward", - "distance": 0.6435406698564594, - "indel_impact": 0, - "protein": "MGGIFSKRNGGGWPAIRERMRRAQPAADGVGAASRDLERHGALTSNNTPANNADCAWLEAQEEGEEVGFPVRPQVPLRPMTYKGAVDLSHFLKEKGGLEGLIYSQQRQDILDLWVYNTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPMEPEDVEKATEGEDNILLHPVNQHGMDDPDKEVLVWKFDSRLAFRHVAREIHPEYYKNC", - "nucleotides": "ATGGGAGGCATATTCTCAAAACGTAATGGGGGTGGGTGGCCTGCAATAAGGGAAAGAATGAGACGAGCTCAGCCAGCAGCAGATGGGGTAGGAGCAGCATCTCGAGACCTGGAAAGACATGGAGCTCTCACAAGTAACAATACACCAGCTAACAATGCTGATTGTGCCTGGCTGGAAGCACAGGAAGAGGGGGAAGAAGTGGGTTTCCCAGTCAGACCTCAGGTACCTTTAAGACCAATGACATACAAGGGAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGTTAATTTACTCCCAACAAAGACAAGATATCCTTGATCTGTGGGTCTACAACACACAGGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATAAGATATCCACTGACCTTTGGATGGTGCTTCAAGCTAGTGCCAATGGAACCAGAGGATGTAGAAAAGGCCACTGAAGGAGAGGACAACATCTTGTTACACCCTGTGAACCAGCATGGGATGGACGACCCAGATAAAGAAGTGTTAGTGTGGAAGTTTGACAGCCGCCTAGCATTCCGTCACGTGGCCCGAGAGATACATCCGGAGTACTACAAGAACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115158.1": [ - { - "region": "gag", - "start": 316, - "end": 1818, - "orientation": "forward", - "distance": 0.3747011952191237, - "indel_impact": 0, - "protein": "MGARASVLSGGELDKWEKIRLRPGGKKQYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPALQTGSEELKSLFNTVAVLYCVHQRIEIKDTKEALDKIEEEQNKSKKKTQQAAADPGNSNQVSQNYPIVQNMQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSASVMMQKGNFRSQRKMVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFPQSRTEPTAPPEESFSFGEKTTTPSQKQEPIDQELYPMSSLKSLFGNDP", - "nucleotides": "ATGGGTGCGAGAGCGTCGGTATTAAGCGGGGGAGAATTAGACAAATGGGAGAAAATTCGGTTAAGGCCAGGGGGAAAGAAACAATATAAGTTAAAACATATAGTATGGGCAAGCAGGGAACTGGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCAGCCCTTCAGACAGGATCAGAGGAACTTAAATCATTATTTAATACAGTAGCAGTCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAACAAAACAAAAGTAAGAAAAAGACACAGCAAGCAGCAGCTGACCCAGGAAATAGCAACCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATGCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTGAAAGTAGTAGAAGAGAAGGCTTTTAGCCCAGAAGTAATACCCATGTTTTCAGCCTTATCAGAAGGAGCCACCCCACAAGATTTGAACACGATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACTATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCGATTGCACCAGGCCAGATGAGAGAACCGAGGGGAAGTGACATAGCAGGAACCACCAGTACCCTTCAGGAGCAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTACAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGATCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCAAACCCAGATTGTAAGACCATTTTAAAAGCATTAGGTCCAGCAGCTACACTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCGGAAGCAATGAGCCAAGTAACAAACTCAGCTTCCGTGATGATGCAGAAAGGCAATTTTAGGAGCCAAAGAAAGATGGTTAAGTGTTTCAATTGTGGCAAAGAAGGACACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCAAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGGAACTTTCCTCAAAGCAGAACAGAACCAACAGCCCCACCAGAGGAGAGCTTCAGTTTTGGGGAAAAGACAACAACTCCTTCTCAGAAGCAGGAGCCGATAGACCAGGAACTATACCCTATGTCTTCCCTCAAATCACTCTTTGGCAACGACCCTTAGTCACAGTAA", - "subtype_start": 234, - "subtype_end": 1730, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKTYKLKHIVWASRELERFALNPGLLETSEGCRQILGQLQPALQTGSEELRSLFNAVATLYCVHQKIDVKDTKEALDKIEEEQNKSKKKVQQAADTGNNNQVSQNYPIVQNAQGHMIHQPISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGCTPQDLNTMLNTVGGHQAAMQMLKDTINEEAAEWDRLHPVHAGPIPPGQMREPRGSDIAGTTSTLQEQIQWMTSNPPVPVGDIYKRWIILGLNKIVRMYSPVSILDIKQGPKEPFRDYVDRFFKVLRAEQATQDVKNWMTETLLVQNSNPDCKTILKALGPQATLEEMMTACQGVGGPGHKARVLAEAMSQATASNVIMMQKGNYRGQRKIVKCFNCGREGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSSKGRPGNFLQNRPEPTAPPAESLGFGEETTPSPKQEQKEGLYPPLASLRSLFGNDP*SQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTGGATAGATGGGAAAAAATTCGGTTAAGGCCAGGAGGAAAGAAAACATATAAATTGAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCTCTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCAGCCCTTCAGACAGGATCAGAAGAACTTAGATCACTATTTAATGCAGTAGCAACCCTCTATTGTGTGCATCAAAAGATCGATGTAAAAGACACCAAGGAAGCTTTAGATAAAATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGTACAGCAAGCAGCTGACACAGGAAATAACAACCAGGTCAGCCAAAATTACCCTATAGTGCAGAACGCCCAGGGGCACATGATACATCAGCCTATTTCACCTAGAACTTTAAATGCATGGGTAAAGGTGGTAGAAGAAAAGGCTTTTAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGATGCACCCCACAAGATTTAAACACCATGTTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGCTAAAAGACACCATTAATGAGGAAGCTGCAGAATGGGACAGATTACATCCAGTGCATGCAGGACCTATCCCACCAGGCCAGATGAGGGAACCTAGGGGAAGTGATATAGCTGGAACTACCAGTACCCTTCAGGAACAAATACAATGGATGACAAGCAACCCACCTGTCCCAGTGGGAGATATCTATAAAAGATGGATCATCCTAGGATTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTTTGGACATAAAACAAGGGCCAAAAGAACCCTTTAGAGATTATGTGGATAGGTTCTTTAAAGTCCTAAGAGCCGAGCAAGCTACACAGGATGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAACCCAGATTGTAAGACCATTTTAAAAGCATTGGGACCACAGGCTACACTAGAAGAAATGATGACAGCATGCCAAGGAGTGGGAGGGCCCGGCCATAAAGCAAGAGTTTTGGCAGAAGCAATGAGCCAAGCAACAGCTTCAAATGTCATAATGATGCAGAAAGGCAATTATAGGGGCCAGAGAAAGATTGTCAAGTGTTTCAATTGTGGCAGAGAAGGACACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAGTGTGGAAAAGAAGGACACCAAATGAAAGACTGCACTGAAAGACAGGCTAATTTTTTAGGGAAAATTTGGCCTTCCAGCAAGGGGAGGCCAGGGAATTTTCTCCAGAACAGGCCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTGGGGTTTGGAGAGGAGACAACCCCCTCTCCGAAGCAGGAACAGAAAGAGGGACTGTATCCTCCCTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTAGTCACAGTAA" - }, - { - "region": "pol", - "start": 1611, - "end": 4622, - "orientation": "forward", - "distance": 0.27623762376237626, - "indel_impact": 0, - "protein": "FFRENLAFPQGEARELSSKQNRTNSPTRGELQFWGKDNNSFSEAGADRPGTIPYVFPQITLWQRPLVTVRIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVKQYDQISVEICGHKAIGTVLIGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALTEICKEMEKEGKISKIGPENPYNTPIFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTRILEPFRKQNPDMVIYQYMDDLYVGSDLEIGQHRTKVEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGAKALTEVVPLTEEAELELAENREILKEPVHGVYYDPTKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWESWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRETKSGKAGYVTDRGRQKVISLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIQKEKVYLAWVPAHKGIGGNEQVDKLVSSGIRKVLFLDGIDKAQEDHEKYHSNWRAMASDFNIPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVTTIHTDNGPNFTSNAVKAACWWAGVKQEFGIPYNPQSQGVVESMNKQLKQIIGQVREQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDGCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGGAACTTTCCTCAAAGCAGAACAGAACCAACAGCCCCACCAGAGGAGAGCTTCAGTTTTGGGGAAAAGACAACAACTCCTTCTCAGAAGCAGGAGCCGATAGACCAGGAACTATACCCTATGTCTTCCCTCAAATCACTCTTTGGCAACGACCCTTAGTCACAGTAAGGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTGTTAGAAGAAATGAGTCTACCAGGAAGATGGAAGCCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAAACAGTATGATCAGATATCAGTAGAAATTTGTGGACATAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAATATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGTACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAGAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAACAGAAATTTGTAAGGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAATATTTGCAATAAAGAAAAAAGACAGTACTAAATGGAGAAAGTTAGTAGATTTTAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACACCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCCTAGATAAAGACTTCAGGAAATATACTGCATTTACCATACCTAGTACAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTACTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAGAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATGGTTATATATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAGTAGAGGAACTGAGACAACATCTGTTGAAGTGGGGGTTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATACAACTGCCAGAGAAAGACAGCTGGACTGTCAATGATATACAGAAATTAGTGGGAAAGTTGAATTGGGCAAGTCAGATTTATCCAGGGATTAAAGTAAGGCAGTTATGTAAACTACTTAGAGGAGCCAAGGCGCTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATCCTAAAAGAACCAGTGCATGGAGTGTATTATGACCCAACAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGTCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAACTTGAAAACAGGAAAATATGCAAGAACAAGGGGTGCTCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAGACCCCTAAATTTAGACTACCCATTCAAAAAGAAACATGGGAATCATGGTGGACAGAATATTGGCAAGCCACTTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAAAAAGAACCCATAATAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACAAAATCAGGAAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAAGTTATCTCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAGGCACAACCAGATAAAAGCGAATCAGAGTTAGTCAATCAAATAATAGAGCAATTAATACAAAAGGAAAAGGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGGGGAAATGAACAAGTAGATAAATTAGTCAGTTCTGGAATCAGAAAGGTACTATTTTTGGATGGAATAGATAAGGCCCAAGAAGATCATGAAAAATATCACAGTAATTGGAGAGCAATGGCTAGTGACTTTAACATACCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAATTAGATTGTACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCTAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGACAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAACAACAATACATACAGACAATGGCCCCAACTTCACCAGTAATGCAGTTAAGGCCGCCTGTTGGTGGGCAGGGGTCAAACAAGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTTGTAGAATCTATGAATAAACAATTAAAACAAATTATAGGACAGGTAAGAGAGCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAACTTCTTTGGAAAGGTGAGGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTACCAAGAAGAAAAGCAAAAATTATTAGGGATTATGGAAAACAGATGGCAGGTGATGGTTGTGTGGCAAGTAGACAGGATGAGGATTAA", - "subtype_start": 1526, - "subtype_end": 4534, - "subtype_aminoacids": "FFRENLAFQQGEAREFSPEQARANSPASRELGVWRGDNPLSEAGTERGTVSSLSFPQITLWQRPLVTVKVGGQMKEALLDTGADDTVLEDINLPGKWKPKMIGGIGGFIKVRQYDQIIIEICGKKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALIEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLYEEFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLKWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLQEKDNWTVNDIQKLVGKLNWASQIYPGIKVRQLCRLLRGTKALTDIVPLTKEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARSRGAHTNDVRQLTEAVQKITTESIVIWGKTPKFRLPIQRETWETWWTDYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRETKLGKAGYVTNKGRQKVVSLTDTTNQKTELQAILLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLINKEKVYLSWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQEEHEKYHNNWRAMASDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESINKELKKIIGQIRDQAEHLKTAVQMAVFIHNFKRRGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPVWKGPAKLLWKGEGAVVIQDNSEIKVVPRRKAKIIRDYGKQMAGDDCVAGRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAAATTTGGCCTTCCAGCAAGGGGAGGCCAGGGAATTTTCTCCAGAACAGGCCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTGGGGTTTGGAGAGGAGACAACCCCCTCTCCGAAGCAGGAACAGAAAGAGGGACTGTATCCTCCCTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTAGTCACAGTAAAAGTAGGGGGACAGATGAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGACATAAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAAATAATCATAGAAATTTGTGGAAAGAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAAATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTGAAGCCAGGAATGGATGGCCCAAAGGTCAAACAATGGCCATTAACGGAAGAAAAAATAAAAGCATTAATAGAAATCTGTACAGAAATGGAAAAGGAAGGGAAAATCTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACAAAATGGAGAAAATTAGTAGATTTTAGAGAACTTAATAAAAGAACTCAAGATTTTTGGGAGGTTCAATTAGGAATACCGCATCCTGCAGGATTAAAAAAGAAAAAATCAGTAACAGTATTAGATGTGGGAGATGCATATTTTTCAGTTCCCTTATATGAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTACTTCCACAGGGATGGAAAGGATCACCAGCAATATTTCAAAGCAGCATGACGAAAATCTTAGAGCCTTTTAGAAAACAGAATCCAGACATAGTGATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTACTGAAATGGGGGCTTACTACACCAGACAAAAAACATCAGAAAGAACCTCCCTTCCTTTGGATGGGTTATGAACTACATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCAAGAAAAGGACAACTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGACAATTATGCAGACTCCTTAGGGGAACCAAGGCACTAACAGACATAGTACCACTAACAAAAGAGGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAAAGAACCAGTACATGGGGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATCTATCAGGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGGTCAAGGGGTGCCCACACTAATGATGTAAGACAGTTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGGAAAACTCCTAAATTTAGACTACCCATACAAAGAGAGACATGGGAAACATGGTGGACAGACTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCCCCTCTAGTAAAGTTATGGTACCAGTTAGAGAAAGAACCCATAATAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAAAGGAAGACAGAAAGTTGTCTCCCTAACTGACACAACCAATCAGAAGACTGAGTTACAAGCAATCCTTCTAGCTTTACAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAGTATGCATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAATTAGTCAATCAAATAATAGAGCAATTAATAAACAAGGAAAAGGTCTACCTGTCATGGGTACCAGCACACAAAGGGATTGGAGGAAATGAACAAGTAGATAAATTAGTTAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGGATAGATAAAGCCCAAGAAGAACATGAAAAATATCACAATAATTGGAGGGCAATGGCCAGTGATTTTAACATCCCACCTGTGGTAGCAAAAGAGATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTGGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGCACTACAGTTAAGGCCGCCTGTTGGTGGGCGGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATAAATAAGGAATTAAAGAAAATTATAGGACAGATAAGAGATCAGGCTGAACATCTTAAGACAGCAGTGCAAATGGCAGTATTCATCCACAATTTTAAAAGAAGAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACCTACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGACCCAGTTTGGAAAGGACCAGCAAAACTACTCTGGAAAGGTGAAGGGGCAGTGGTCATACAAGACAATAGTGAAATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATAATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4567, - "end": 5148, - "orientation": "forward", - "distance": 0.5815384615384613, - "indel_impact": 0, - "protein": "MENRWQVMVVWQVDRMRINMWKSLVKYHIHNSGKARGWCYRHHYESRHPRISSEVHIPLGGEARLVVTTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPDLADQLIHLYYFNCFSESAIRQAILGHRVSPRCEYQAGHNKVGSLQYLALIALIPPKRRKPPLPSVRKRTEDRWNEPQRTKGHRESHTMNGH", - "nucleotides": "ATGGAAAACAGATGGCAGGTGATGGTTGTGTGGCAAGTAGACAGGATGAGGATTAACATGTGGAAAAGTTTAGTAAAATACCATATACATAATTCAGGGAAGGCTAGGGGATGGTGTTATAGACATCACTATGAAAGCAGACATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGAGGAGAGGCTAGACTAGTAGTGACAACATATTGGGGCCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAAGGAGTCTCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTGACCTAGCAGACCAACTAATTCATCTGTATTATTTTAATTGTTTTTCAGAATCTGCTATAAGACAAGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAATAAGGTAGGATCTCTGCAGTACTTGGCACTAATAGCATTAATACCACCAAAAAGGAGAAAGCCACCTTTGCCTAGTGTGAGGAAACGGACAGAGGATAGATGGAACGAGCCCCAGAGGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAG", - "subtype_start": 4479, - "subtype_end": 5060, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRISKWKSLVKYHIHTSKKAKKWFYRHHFESRHPKISSKVHIPLEEENKLVVTTYWGLNTGERDWHLGQGVSIEWRQGKYRTQIDPGLADQLIHIYYFDCFSESAIRKAILGHRVSPRCNYQAGHNKVGSLQYLALTALIATKKAKPPLPSVQKLVXDRWNKPQKTKGHRESHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGATTAGCAAATGGAAAAGTTTAGTTAAATACCATATACATACTTCAAAGAAAGCCAAAAAATGGTTCTATCGACATCACTTTGAAAGCAGGCATCCAAAAATAAGCTCAAAAGTACACATCCCWCTAGAGGAAGAAAATAAATTAGTAGTAACAACATATTGGGGTCTGAATACAGGGGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGCAGGGGAAGTATAGGACACAAATAGACCCTGGCCTGGCAGACCAACTGATTCATATATATTATTTTGATTGTTTTTCAGAATCTGCTATAAGGAAAGCCATATTAGGACATAGAGTTAGCCCTAGGTGTAACTATCAAGCAGGACATAACAAGGTAGGATCTCTACAATATTTGGCACTAACAGCATTAATAGCTACAAAGAAGGCAAAGCCGCCTTTGCCTAGTGTCCAGAAATTAGTARRAGACAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5088, - "end": 5378, - "orientation": "forward", - "distance": 0.46875, - "indel_impact": 0, - "protein": "MERAPEDQGPQREPYNEWTLELLEELKREAVRHFPRAWLHGLGQHIYETYGDTWAGVEALIRILQQLVFIHFRIGCQHSRIGILTHRRARNGASRS", - "nucleotides": "ATGGAACGAGCCCCAGAGGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAGAACTTTTAGAAGAGCTTAAAAGAGAAGCTGTTAGGCACTTTCCTAGGGCATGGCTTCACGGCCTGGGACAACATATCTATGAAACTTATGGAGATACTTGGGCAGGAGTGGAAGCTCTAATAAGAATTCTGCAACAACTGGTGTTTATTCATTTCAGAATTGGATGTCAGCACAGCAGGATAGGCATTCTTACCCATAGAAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5000, - "subtype_end": 5290, - "subtype_aminoacids": "MEQAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYNTYGDTWEGVEAIIRMLQHLLFIHFRIGCNHSKIGIIRQRRTRNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAAAATGAAGCTGTCAGACATTTTCCTAGGCCATGGCTCCATGGCTTAGGACAACATATCTACAACACCTATGGGGATACTTGGGAGGGAGTTGAAGCTATAATAAGGATGCTGCAACATCTACTGTTTATCCATTTCAGAATTGGGTGCAATCATAGCAAAATAGGCATTATTCGACAGAGAAGAACAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5359, - "end": 5573, - "orientation": "forward", - "distance": 0.5, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPRTACTSCYCKKCCYHCPRCFLTKGLGISYGRKKRRQRRRTPQDNQNHQVSLSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTGGAGCCCTGGAAGCATCCAGGGAGTCAGCCTAGGACTGCGTGCACCAGTTGCTATTGTAAAAAGTGTTGCTATCATTGCCCGCGATGCTTCTTAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAAGATAACCAGAACCATCAAGTTTCTTTATCAAAACA", - "subtype_start": 5271, - "subtype_end": 5485, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPRTACTKCYCKRCCFHCQVCFITKGLGISYGRKKRRQRRXPPQSGQTHQDSLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGCCAGCCTAGGACTGCGTGTACCAAATGTTATTGTAAGAGATGTTGCTTTCATTGCCAAGTTTGCTTCATAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAARACCTCCTCAAAGCGGTCAGACTCATCAAGATTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5498, - "end": 5573, - "orientation": "forward", - "distance": 0.6923076923076923, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLKITRTIKFLYQNX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAAGATAACCAGAACCATCAAGTTTCTTTATCAAAACA", - "subtype_start": 5410, - "subtype_end": 5485, - "subtype_aminoacids": "MAGRSGDSDEBLLKAVRLIKILYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAARACCTCCTCAAAGCGGTCAGACTCATCAAGATTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5590, - "end": 5859, - "orientation": "forward", - "distance": 0.720430107526882, - "indel_impact": 0, - "protein": "MQSLEILAIVALVVAFIIAIVVWSIVLIEYRKIRRQKRIDRILDRIRERAEDSGNESEGDQEELAALVEMGNHAHLGMGHHAPWDVNDL", - "nucleotides": "ATGCAATCCTTAGAAATATTAGCAATAGTAGCATTAGTAGTAGCATTTATAATAGCAATAGTTGTTTGGTCTATAGTACTCATAGAATATAGGAAAATAAGAAGACAAAAGAGAATAGACAGGATACTTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGACCAAGAAGAATTGGCAGCCCTCGTGGAGATGGGGAACCATGCTCATTTGGGGATGGGCCACCATGCTCCTTGGGATGTCAATGACCTGTAA", - "subtype_start": 5502, - "subtype_end": 5747, - "subtype_aminoacids": "MQSLEILAIVALVVAAILAIVVWSIVLIEYRKILRERKVYKLIDRIRERAEDSGNESEGDQEELSAMVERGHLAPWDINDL*", - "subtype_nucleotides": "ATGCAATCTTTAGAGATATTAGCAATAGTAGCACTAGTAGTAGCAGCAATATTAGCAATAGTTGTGTGGTCTATAGTACTCATAGAATATAGGAAAATATTAAGAGAAAGAAAAGTATATAAATTAATTGACAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGCGAGGGAGATCAAGAAGAATTATCAGCAATGGTGGAAAGGGGGCATCTTGCTCCTTGGGATATTAATGATCTGTAA" - }, - { - "region": "env", - "start": 5753, - "end": 8314, - "orientation": "forward", - "distance": 0.6398200224971886, - "indel_impact": 0, - "protein": "MRVKGTKKNWQPSWRWGTMLIWGWATMLLGMSMTCNAEDSWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVELNVTENFNAWKNNMVDQMHEDIISLWDQSLKPCVKLTPLCVTLNCTELKNSTGEMKNCSFNATTAIRDKVQKQYALFYRLDVVQIKDDNTSYNTRYRLINCNSSTITQACPKVSFEPIPIHYCAPAGFAILKCKDKNFNGTGLCENVSTVQCTHGIRPVVSTQLLLNGSLAEGDVVIRSKNFSNSAQTIIVQLNETVEINCTRPNNNTRKSIHIGPGRAFYATGDIIGDIRQAHCNISRAKWNETLKKIADKLREQFKRETIVFNQSSGGDPEIVMHSFNCRGEFFYCNTTQLFNSTWNSTTGVNGTGANNTEVNIITLPCKIKQIVNMWQEVGKAMYAPPISGQISCSSNITGLLLLRDGGNSNDTDNTTEIFRPGGGDMRDNWRSELYKYKVVQIEPLGVAPTKARRRVVQREKRAVGIGALFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTNVPWNSSWSNKNLSYIWNNMTWMQWEKEIDNYTEEIYNLIEQSQNQQEKNEQELLELDKWASLWTWFDITKWLWYIKIFIMIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTLLPAARGPDRPEGIGEEGGERDRDRSSHLAHGFLTIIWVDLRSLFLFSYHHLRDLLLIVARGVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRFIEVLQRIFRAIIHIPTRIRQGLERALL", - "nucleotides": "ATGAGAGTGAAGGGGACCAAGAAGAATTGGCAGCCCTCGTGGAGATGGGGAACCATGCTCATTTGGGGATGGGCCACCATGCTCCTTGGGATGTCAATGACCTGTAATGCAGAAGACTCGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACCACCACTCTATTTTGTGCATCAGATGCCAAAGCATATGACACAGAGGTACACAATGTCTGGGCCACACATGCTTGTGTACCCACAGACCCCAACCCACAAGAAGTAGAATTAAATGTGACAGAAAATTTTAATGCATGGAAAAATAACATGGTAGATCAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAACTGCACTGAGTTGAAGAATAGCACTGGAGAAATGAAAAACTGTTCTTTTAATGCCACCACAGCAATAAGAGATAAGGTGCAAAAACAATATGCACTTTTTTATAGACTTGATGTAGTACAAATTAAGGATGATAATACCAGCTATAATACCAGATATAGGTTGATAAATTGTAACTCCTCAACCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAATTTCAATGGAACAGGACTATGTGAAAATGTTAGCACAGTACAATGTACACATGGAATTAGACCAGTAGTATCAACTCAACTGTTGTTAAATGGCAGTCTAGCAGAAGGAGATGTAGTAATTAGATCTAAAAATTTCTCTAACAGTGCTCAAACCATAATAGTACAATTAAACGAAACTGTAGAAATTAATTGTACAAGACCCAACAATAATACAAGAAAAAGTATACATATAGGACCAGGGAGAGCATTTTATGCAACAGGAGACATAATAGGAGATATAAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATGAGACTTTAAAAAAGATAGCTGATAAATTAAGGGAACAATTTAAAAGGGAGACAATAGTCTTTAACCAATCCTCAGGAGGGGACCCAGAGATTGTAATGCACAGTTTTAATTGTAGAGGGGAATTTTTCTACTGTAATACAACACAACTGTTTAATAGTACTTGGAATAGTACTACAGGAGTAAATGGTACAGGAGCAAATAATACAGAAGTAAATATTATCACACTCCCATGCAAGATAAAGCAAATTGTAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGTTGTTCATCAAATATTACAGGGCTACTACTACTAAGAGATGGTGGTAATAGCAATGACACGGATAACACGACTGAAATCTTCAGACCTGGAGGAGGAGATATGAGAGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTACAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAGGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTCTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCATCAATAACGCTGACGGTACAAGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATATGTTGCAGCTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTAGAAAGATACCTTAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATCTGCACCACTAATGTGCCTTGGAATAGTAGTTGGAGTAATAAAAATCTGAGTTATATCTGGAATAACATGACCTGGATGCAGTGGGAAAAAGAAATTGACAATTACACAGAGGAAATATACAACTTAATTGAACAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGACTTGGTTTGACATAACAAAGTGGCTGTGGTATATAAAAATATTTATAATGATAGTAGGAGGCTTAATAGGTTTAAGAATAGTTTTTACTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAGCGCATGGATTCTTAACAATTATCTGGGTCGACCTGCGGAGCCTGTTCCTCTTCAGCTACCACCACTTGAGAGACTTACTCTTGATTGTAGCGAGGGGAGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGCCAGGAACTAAAGAATAGTGCTGTCAGCTTGCTCAACGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGTTTATAGAAGTATTGCAAAGAATTTTTAGAGCTATTATCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCTTTGCTATAA", - "subtype_start": 5665, - "subtype_end": 8211, - "subtype_aminoacids": "MRAREIKKNYQQWWKGGILLLGILMICNAEKSEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEAHNVWATHACVPTDPNPQEMLLKNVTENFNMWKNNMVDQMHEDIISLWDQSLKPCVKLTPLCVTLNCNNTVTTNASMNNSGEMKNCSFNITTQTRGRKREYALFYNLDVVQLESDKTSTSYRLISCNTSVITQACPKISFEPIPIHYCAPAGFAILKCNDKQFNGTGPCKNVSTVQCTHGIRPVVSTQLLLNGSLAEEDIIIRSQNISDNAKSIIVQLNESISINCIRPGNNTRKSIHMGPGKVFYATGDIIGNIRQAHCNISKAKWNNTLRQIARKLGEQFNNKTIVFNQSSGGDPEIVMHTFNCGGEFFYCNTTSLFNSTWENDTNITEESNSSDDTITLQCKIKQIINLWQEVGKAMYAPPISGYINCSSNITGLILVRDGGNNRTSESETFRPEGGNMKDNWRSELYKYKVVKIEPLGIAPTKAKRRVVQRQKRAVGFGALFLGFLGAAGSTMGAASVALTVQARLLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLKDQQIMGIWGCSGKYICTTAVPWNTSWSNKSYDQIWKNMTWMQWEKEIDNYTSEIYSLIALSQDQQEKNEQELLELDKWASLWNWFDISNWLWYIKIFIMIVGGLVGLRIVFAILSIVNRVRQGYSPLSFQTHHPAPRGPDRPGGIEEEGGERDRDRSGRSASGFLTLIWIDLRSLCLFSYHHLRDLLLIAARIVELLGRRGWELLKYWWNLLQYWSQELKNSAISLLNTTAIVVAEGTDRIIEALQSAGRAVLHIPRRIRQGLERALL*", - "subtype_nucleotides": "ATGAGAGCGAGGGAGATCAAGAAGAATTATCAGCAATGGTGGAAAGGGGGCATCTTGCTCCTTGGGATATTAATGATCTGTAATGCTGAAAAGTCTGAAAAGTTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAGGCATATGATACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAATGTTATTGAAAAATGTGACAGAAAATTTTAACATGTGGAAAAACAACATGGTAGATCAGATGCATGAAGATATAATTAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACACCACTTTGTGTTACCTTAAATTGCAATAATACTGTCACCACTAATGCTAGCATGAATAATAGTGGAGAAATGAAAAATTGCTCTTTCAATATCACCACCCAAACGAGAGGGAGAAAGAGAGAATATGCACTTTTTTATAACCTTGATGTAGTGCAATTAGAATCAGACAAAACTAGTACTAGCTATAGGTTGATAAGTTGTAACACCTCAGTCATTACACAGGCTTGTCCAAAGATATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCAGCTGGTTTTGCGATTCTAAAGTGTAATGATAAGCAGTTCAATGGAACAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAATTGCTGTTAAATGGCAGCCTAGCAGAAGAAGATATAATAATCAGGTCTCAAAATATCTCAGATAATGCAAAAAGCATAATAGTACAGTTGAATGAATCTATAAGCATTAATTGTATAAGACCCGGCAATAATACAAGAAAAAGTATACATATGGGACCAGGCAAGGTATTTTATGCAACAGGAGATATAATAGGAAATATAAGACAAGCACATTGTAACATTAGTAAAGCAAAATGGAATAACACTTTAAGACAGATAGCCAGAAAATTAGGAGAACAATTTAACAATAAAACAATAGTCTTTAATCAATCCTCAGGAGGGGACCCAGAAATTGTAATGCATACTTTTAACTGTGGAGGGGAATTTTTCTACTGTAATACAACATCACTGTTTAATAGTACATGGGAGAATGATACAAATATTACTGAAGAATCAAATAGCTCAGATGACACAATCACACTCCAATGCAAAATAAAACAAATTATAAACTTGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGATACATTAACTGTTCATCAAATATCACAGGGCTGATATTAGTAAGAGATGGTGGTAATAACAGAACAAGTGAGAGTGAGACCTTCAGACCTGAAGGAGGAAATATGAAGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAATAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGACAAAAAAGAGCAGTGGGATTTGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGGCGCTGACGGTACAGGCCAGACTATTATTGTCTGGTATAGTGCAGCAGCAGAACAATCTGCTGAGGGCTATTGAGGCGCAACAGCATATGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGATCATGGGGATTTGGGGTTGCTCTGGAAAATACATCTGCACCACTGCTGTGCCTTGGAATACTAGCTGGAGTAATAAATCTTATGATCAGATTTGGAAGAACATGACCTGGATGCAGTGGGAAAAAGAAATTGATAATTACACAAGTGAAATATACAGCTTAATTGCACTATCGCAAGACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTGGACAAATGGGCAAGCTTGTGGAATTGGTTTGACATATCAAACTGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTAGTAGGATTAAGAATAGTTTTTGCAATACTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTGTCATTCCAGACCCACCACCCAGCTCCGAGGGGACCCGACAGGCCCGGAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGTCGATCGGCGAGCGGATTCTTAACACTTATCTGGATCGACCTACGGAGCCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTACTCTTGATTGCAGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAACTCCTCAAATACTGGTGGAACCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCCATTAGCTTGCTTAATACCACAGCAATAGTAGTAGCTGAGGGGACAGATAGAATTATAGAAGCTTTGCAAAGTGCTGGTAGAGCTGTTCTCCACATACCTAGAAGAATAAGACAGGGCTTAGAAAGGGCTTTACTATAA" - }, - { - "region": "tat_exon2", - "start": 7896, - "end": 7988, - "orientation": "forward", - "distance": 0.6774193548387097, - "indel_impact": 0, - "protein": "RPSSQPRGDPTGPKESEKKVERETETDPVT", - "nucleotides": "AGACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAG", - "subtype_start": 7793, - "subtype_end": 7885, - "subtype_aminoacids": "RPTTQLRGDPTGPEESKKKVERETETDPVDR", - "subtype_nucleotides": "AGACCCACCACCCAGCTCCGAGGGGACCCGACAGGCCCGGAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGTCGATCGG" - }, - { - "region": "rev_exon2", - "start": 7897, - "end": 8172, - "orientation": "forward", - "distance": 0.5217391304347827, - "indel_impact": 0, - "protein": "DPPPSREGTRQARRNRRRRWRERQRQIQSLSAWILNNYLGRPAEPVPLQLPPLERLTLDCSEGSGTSGTQGVGSPQVLVESPAVLEPGTKE", - "nucleotides": "GACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAGCGCATGGATTCTTAACAATTATCTGGGTCGACCTGCGGAGCCTGTTCCTCTTCAGCTACCACCACTTGAGAGACTTACTCTTGATTGTAGCGAGGGGAGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGCCAGGAACTAAAGAATAG", - "subtype_start": 7794, - "subtype_end": 8069, - "subtype_aminoacids": "DPPPSSEGTRQARRNRRRRWRERQRQIRSIGERILNTYLDRPTEPVPLQLPPLERLTLDCSEDCGTSGTQGVGTPQILVEPPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCACCCAGCTCCGAGGGGACCCGACAGGCCCGGAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGTCGATCGGCGAGCGGATTCTTAACACTTATCTGGATCGACCTACGGAGCCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTACTCTTGATTGCAGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAACTCCTCAAATACTGGTGGAACCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8316, - "end": 8939, - "orientation": "forward", - "distance": 0.6866028708133971, - "indel_impact": 0, - "protein": "MGGIFSKRNGGGWPAIRERMRRAQPAADGVGAASRDLERHGALTSNNTPANNADCAWLEAQEEGEEVGFPVRPQVPLRPMTYKGAVDLSHFLKEKGGLEGLIYSQQRQDILDLWVYNTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPMEPEDVEKATEGEDNILLHPVNQHGMDDSEREVLVWKFDSRLAFRHVAREIHPEYYKNC", - "nucleotides": "ATGGGAGGCATATTCTCAAAACGTAATGGGGGTGGGTGGCCTGCAATAAGGGAAAGAATGAGACGAGCTCAGCCAGCAGCAGATGGGGTAGGAGCAGCATCTCGAGACCTGGAAAGACATGGAGCTCTCACAAGTAACAATACACCAGCTAACAATGCTGATTGTGCCTGGCTGGAAGCACAGGAAGAGGGGGAAGAAGTGGGTTTCCCAGTCAGACCTCAGGTACCTTTAAGACCAATGACATACAAGGGAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGTTAATTTACTCCCAACAAAGACAAGATATCCTTGATCTGTGGGTCTACAACACACAGGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATAAGATATCCACTGACCTTTGGATGGTGCTTCAAGCTAGTGCCAATGGAACCAGAGGATGTAGAAAAGGCCACTGAAGGAGAGGACAACATCTTGTTACACCCTGTGAACCAGCATGGGATGGACGACTCAGAGAGAGAAGTGTTAGTGTGGAAGTTTGACAGCCGCCTAGCATTCCGTCACGTGGCCCGAGAGATACATCCGGAGTACTACAAGAACTGCTGA", - "subtype_start": 8213, - "subtype_end": 8833, - "subtype_aminoacids": "MGNKWSKSSIVGWPTIRERIRRTPPIAEGVGAVSRDLGKHGAITSSNTAANNPDLAWLEAQEGEEVGFPVRPQVPLRPMTYKGAFDLSFFLKEKGGLEGLIYSRKRQEILDLWVYHTQGYFPDWQNYTPGPGVRYPLTFGWCFKLVPVDPEEVEKANEGENNCLLHPMSQHGMEDEDREVLMWKFDRHLASKHVARELHPEYYKDC*", - "subtype_nucleotides": "ATGGGAAACAAGTGGTCAAAAAGTAGTATAGTTGGATGGCCTACTATAAGGGAAAGAATAAGACGAACCCCTCCAATAGCAGAAGGGGTGGGAGCAGTCTCTCGAGACCTAGGAAAGCATGGAGCAATCACAAGTAGCAACACAGCAGCTAATAATCCTGACTTGGCCTGGCTGGAAGCACAGGAGGGTGAGGAAGTAGGCTTCCCAGTCAGACCTCAGGTACCTTTAAGACCAATGACTTACAAGGGAGCTTTCGATCTTAGCTTCTTTTTAAAAGAAAAGGGGGGACTGGAAGGGTTAATTTATTCCAGGAAAAGACAAGAGATCCTTGATCTATGGGTTTACCACACACAAGGATACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGGTATCCATTGACCTTTGGGTGGTGCTTCAAGCTAGTACCAGTTGACCCAGAGGAGGTAGAAAAGGCCAATGAAGGAGAAAACAACTGCTTGCTACACCCCATGAGCCAACATGGAATGGAGGATGAAGACAGAGAAGTACTGATGTGGAAGTTTGACAGACACCTAGCATCTAAGCACGTAGCCCGAGAGCTACATCCGGAGTATTACAAGGACTGCTGA" - } - ], - "MK114705.1": [ - { - "region": "gag", - "start": 532, - "end": 2046, - "orientation": "forward", - "distance": 0.23247524752475246, - "indel_impact": 0, - "protein": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNTIAVLYCVHQRIDVKDTKEALNKIEEEQNKSKKKAQQAAADTGNSSQSSQVSQNYPIVQNHQGQMVYQALSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHNPPIPVGEIYKRWIIMGLNKIVRMYSPISILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQATGGATNIMMQKGNFRNQGKPIKCFNCGKEGHLARNCRAPRKKGCWKCGKEGHQMKDCSERQANFLGKIWPSHKGRPGNFLQNRPEPTAPPEESFRFGEETTAPPQKQEPTDKELYPFSSLKSLFGNDPSSQ", - "nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGCGAATTAGATAGATGGGAGAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAGTTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCGGTTAATCCTGGCCTGTTAGAAACATCAGAGGGCTGTAGGCAAATACTGGGACAGCTACAACCGTCCCTTCAAACAGGATCAGAAGAACTTAAATCATTATTTAATACAATAGCAGTCCTTTATTGCGTACATCAAAGGATAGATGTAAAAGACACCAAGGAAGCTCTAAATAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGCAGCAGCTGACACAGGAAACAGCAGTCAGAGCAGTCAAGTCAGCCAAAATTACCCTATAGTGCAGAACCATCAGGGGCAAATGGTATATCAGGCTCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCCGAGGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAGGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTACATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGGACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACACATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCATGGGGTTAAATAAAATAGTAAGAATGTATAGCCCTATCAGCATTCTGGACATAAGACAAGGACCAAAAGAACCTTTTAGAGATTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAAGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACCATTCTAAAAGCATTAGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGGGGACCCAGCCACAAAGCAAGAGTCTTGGCTGAAGCAATGAGCCAAGCAACAGGTGGTGCAACTAACATAATGATGCAGAAAGGCAATTTTAGGAACCAAGGAAAACCTATTAAGTGTTTCAATTGTGGCAAAGAAGGGCACCTAGCTAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGACTGCTCTGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTCCTCCAGAACAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAGCTCCCCCTCAGAAGCAGGAGCCGACAGACAAGGAACTGTATCCCTTCTCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAGTAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1839, - "end": 4850, - "orientation": "forward", - "distance": 0.17412935323383083, - "indel_impact": 0, - "protein": "FFRENLAFPQGKAREFPPEQTRANSPTRRELQVWGRDNSSPSEAGADRQGTVSLLFPQITLWQRPLVTVKIGGQLKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTKIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDESFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIEQHRAKVENLREHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGAKALTEVITLTEEAELELAENREILKEPVHGVYYDPSKDLVAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIATESIVVWGKIPKFRLPIQKETWETWWMEYWQATWIPEWEFVSTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKKGKAGYVTDRGRQKVIPLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIKKEKVYLTWVPAHKGIGGNEQVDKLVSAGIRKILFLDGIDKAQEDHEKYHSNWKAMASDFNIPPVVAKEIIASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKIIHTDNGSNFTSTTVKAACWWAGVKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGEYCAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSEIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTCCTCCAGAACAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAGCTCCCCCTCAGAAGCAGGAGCCGACAGACAAGGAACTGTATCCCTTCTCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAGTAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAAATACCCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTAAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAACCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCCGTATTTGCCATAAAGAAAAAGGATAGTACTAAATGGAGAAAGTTAGTAGATTTCAGAGAGCTTAATAAAAGAACTCAAGACTTTTGGGAGGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTAGATGTGGGTGATGCATATTTCTCAGTTCCTTTAGATGAAAGCTTCAGAAAGTATACTGCATTTACCATACCTAGTACTAACAATGAGACACCCGGGATTAGGTATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTTCAAAGTAGCATGACGAAAATCTTAGAACCTTTTAGAAAACAAAATCCAGACATAGTTATCTACCAATACATGGATGATTTATACGTAGGATCTGACTTAGAAATAGAGCAGCATAGAGCAAAAGTAGAGAACCTGAGAGAGCATCTGTTGAGGTGGGGATTTACCACACCAGACAAAAAGCATCAGAAAGAACCTCCATTTCTTTGGATGGGCTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTATGCAGGGATTAAAGTAAAGCAATTATGTAAACTCCTTAGGGGAGCTAAGGCACTAACAGAAGTGATAACACTAACAGAAGAAGCAGAGCTAGAATTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAGTAGCAGAAATACAGAAGCAGGGGCAAGGTCAATGGACCTATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAGTATGCAAGAACAAGGGGTGCCCACACTAATGATGTAAAACAGTTAACAGAAGCAGTTCAAAAAATAGCCACAGAGAGCATAGTAGTATGGGGAAAGATTCCTAAATTTAGATTACCCATACAGAAAGAAACATGGGAAACATGGTGGATGGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAGTACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAGACTAAAAAAGGAAAAGCAGGATATGTTACTGACAGAGGAAGGCAAAAAGTTATCCCCTTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGACAAGAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAGAAGGTCTACCTGACATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCAGGAATCAGGAAAATACTATTTTTGGATGGAATAGATAAGGCCCAGGAAGATCATGAGAAATATCATAGTAATTGGAAAGCAATGGCTAGTGATTTTAACATACCACCTGTGGTAGCAAAAGAGATAATAGCCAGCTGTGATAAATGTCAGCTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTCCATGTAGCCAGTGGGTACATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTATCTTAAAATTAGCAGGAAGATGGCCAGTAAAAATAATACATACAGATAATGGTAGCAATTTCACCAGCACTACAGTCAAGGCCGCCTGCTGGTGGGCAGGTGTTAAGCAGGAGTTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTGGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGAGTACTGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAACTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAGGGACCAGCAAAGCTGCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGAAATCAAAGTAGTACCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4795, - "end": 5373, - "orientation": "forward", - "distance": 0.34375, - "indel_impact": 0, - "protein": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYISKKAKGWVYRHHYESTHPRISSEVHIPLGDAKLVITTYWGLHTGERDWHLGQGVSIEWRKKRYNTQVDPGLADQLIHMYYFDCFSESAIRQAILGHRVSPSCEYQAGHNKVGSLQYLALAVLVAPKKIKPPLPSVARLTEDRWNKPRKIKGHRESHTMNGH", - "nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTCTAGTAAAACACCATATGTATATTTCAAAGAAAGCTAAGGGATGGGTTTACAGACACCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAAATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGCCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAACACACAAGTAGACCCTGGCCTAGCAGACCAACTAATTCATATGTACTATTTTGATTGTTTTTCAGAATCTGCTATAAGACAAGCCATATTAGGACATAGAGTTAGTCCTAGTTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTGCAATACCTGGCACTAGCAGTATTAGTAGCACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTCGCGAGACTGACAGAGGATAGATGGAACAAGCCCCGGAAGATCAAGGGCCACAGAGAGAGCCATACAATGAATGGGCACTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5313, - "end": 5597, - "orientation": "forward", - "distance": 0.40312499999999996, - "indel_impact": 0, - "protein": "MEQAPEDQGPQREPYNEWALELLEELKREAVRHFPREWLHSLGQHIYETYGDTWAGVGAIIRILQQLLFIHFRIGCHHSRIGILRRTRNGARRS", - "nucleotides": "ATGGAACAAGCCCCGGAAGATCAAGGGCCACAGAGAGAGCCATACAATGAATGGGCACTAGAACTTTTAGAGGAGCTTAAGAGGGAAGCTGTTAGACATTTTCCTAGGGAATGGCTCCATAGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGGCAGGAGTAGGAGCCATAATAAGAATACTGCAACAATTACTGTTTATTCATTTCAGAATTGGGTGTCACCATAGCAGGATAGGCATACTGAGGAGAACAAGAAATGGAGCCCGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5578, - "end": 5792, - "orientation": "forward", - "distance": 0.655263157894737, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPRTPCNNCYCKKCCFHCQVCFTKKGLGISYGRKKRRQRRGPPRDSQIHQVPLPKX", - "nucleotides": "ATGGAGCCCGTAGATCCTAGACTGGAACCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTCCTTGTAACAATTGCTATTGTAAAAAGTGCTGCTTTCATTGCCAAGTTTGCTTCACAAAAAAGGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGCGACAGAGACGAGGACCTCCTCGAGACAGTCAGATTCATCAAGTTCCTCTACCAAAACA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5717, - "end": 5792, - "orientation": "forward", - "distance": 0.8076923076923077, - "indel_impact": 0, - "protein": "MAGRSGDRDEDLLETVRFIKFLYQNX", - "nucleotides": "ATGGCAGGAAGAAGCGGCGACAGAGACGAGGACCTCCTCGAGACAGTCAGATTCATCAAGTTCCTCTACCAAAACA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5809, - "end": 6054, - "orientation": "forward", - "distance": 0.8310344827586209, - "indel_impact": 0, - "protein": "MQPLEISAIVALVVVAIIAIVVWTIVLLEYRKILRQKKIDRLINRISERAEDSGNESDGDQEELSALMEMGRLAPWNVDDL", - "nucleotides": "ATGCAACCTTTAGAGATATCAGCAATAGTAGCATTAGTAGTAGTAGCAATAATAGCAATAGTTGTGTGGACCATAGTACTCTTAGAGTATAGGAAAATATTAAGGCAAAAGAAAATAGACAGATTAATTAATAGAATAAGTGAAAGAGCAGAAGACAGTGGCAATGAGAGTGACGGGGATCAAGAAGAATTGTCAGCTCTTATGGAGATGGGGCGCCTTGCTCCTTGGAATGTTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 5972, - "end": 8548, - "orientation": "forward", - "distance": 0.6253647586980922, - "indel_impact": 0, - "protein": "MRVTGIKKNCQLLWRWGALLLGMLMICSATNMWVTVYYGVPVWKDATTTLFCASDAKAYDTEIHNVWATHACVPTDPDPQEVVLENVTENYNMGKNNMVEQMHEDIISLWDQSLKPCVLLTPFCVTLNCTDANITSTNNSRDKKEGESTLEETKGEIKNCSFNMTSSMSDKSQKQRALFYKLDVVQIDETNNNSYRLISCNTSVVTQACPKVSFDPIPIHYCAPAGFAILKCNNKKFNGTGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVMIRSENFTNNAKTIIVQLKTPVQINCTRPNNNTRKRISMGPGRVIYATGQIIGDIRKAHCNISRAEWNTTLKQIVTQLRKQWNRTIIFNSSSGGDPEIVMHSFNCRGEFFYCNTTKLFNSTWPRNSTWNNTEGSNDTEIITLPCRIKQIVNRWQEVGKAMYAPPIQGQISCSSNITGLLLVRDGGINTSESNETFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGMLGAMFLGLLGAAGSTMGAASVTLTVQTRLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARLLAVERYLQDQQLLGIWGCSGKLICTTTVPWNTSWSNRSYEDIWNNMTWMEWEKEIDNYTGLIYTLIEKSQNQQEINEQELLSLDKWASLWNWFNITNWLWYIKIFIMIVGGLIGLRIVFAVLSIVNRVRQGYSPLSLQTPRPAPRGPDRPEEIEEGGGERDRDRSVRLVTGFFALFWDDLRSLCLFSYHHLRDLILIVVRVVEILGRRGWEALKYWWNLLQYWSQEIKNSAISLLNATAIAVAEGTDRIIGVVQRTWRAFIHIPRRIRQGFERALL", - "nucleotides": "ATGAGAGTGACGGGGATCAAGAAGAATTGTCAGCTCTTATGGAGATGGGGCGCCTTGCTCCTTGGAATGTTGATGATCTGTAGTGCTACAAACATGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGATGCAACCACCACTCTATTCTGTGCATCAGATGCTAAAGCATATGATACAGAGATACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCGACCCACAAGAAGTAGTACTGGAAAATGTGACAGAAAATTATAATATGGGAAAAAATAACATGGTGGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTACTCTTAACCCCATTCTGTGTCACTTTAAATTGCACTGATGCTAACATCACCAGCACTAATAATAGTAGAGATAAGAAGGAAGGAGAAAGTACATTGGAGGAGACGAAAGGAGAAATAAAAAACTGCTCTTTCAATATGACTTCAAGCATGAGCGATAAGTCTCAGAAACAACGTGCACTTTTTTATAAGCTTGATGTGGTACAAATAGATGAGACTAATAATAATAGTTATAGGTTGATAAGTTGTAACACCTCAGTCGTCACACAGGCTTGTCCAAAGGTATCCTTTGATCCAATCCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAATAATAAGAAATTCAATGGAACAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAACCTGTAGTGTCAACCCAACTGCTGTTAAATGGTAGTCTAGCAGAAGAAGAAGTAATGATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTGCAGCTGAAGACACCTGTACAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGGATAAGTATGGGACCAGGGAGAGTAATTTATGCAACAGGACAAATAATAGGAGATATAAGAAAAGCACATTGCAACATTAGTAGAGCAGAATGGAATACAACTTTAAAGCAGATAGTTACACAATTAAGAAAGCAGTGGAATAGAACCATAATCTTTAACTCATCCTCAGGAGGGGACCCAGAAATTGTGATGCACAGTTTTAATTGTAGAGGGGAATTTTTCTACTGTAATACAACAAAACTATTTAATAGTACTTGGCCACGTAATAGTACTTGGAATAATACTGAAGGGTCAAATGACACTGAAATAATCACACTCCCGTGCAGAATAAAACAAATTGTAAACAGGTGGCAGGAAGTAGGCAAAGCAATGTATGCCCCTCCCATCCAAGGACAAATTAGTTGTTCATCAAATATTACAGGGCTGCTACTAGTTAGAGATGGTGGAATTAACACCAGTGAGAGCAACGAGACCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAGGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATGCTGGGAGCTATGTTCCTTGGGCTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGTTGACGGTACAGACCAGACTATTATTGTCTGGTATAGTGCAACAGCAGAACAATCTGCTGAGGGCTATTGAGGCGCAGCAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAACAGCTCCAGGCAAGACTCCTAGCTGTGGAAAGATACCTACAGGATCAACAGCTCCTAGGGATTTGGGGTTGCTCTGGAAAACTCATCTGCACCACTACTGTGCCTTGGAACACTAGTTGGAGTAATAGATCTTATGAAGATATTTGGAACAACATGACCTGGATGGAGTGGGAAAAAGAAATTGACAATTACACAGGCTTAATATACACCTTAATTGAAAAATCGCAGAACCAGCAGGAAATAAATGAACAAGAACTATTGTCATTGGATAAGTGGGCAAGCCTGTGGAATTGGTTTAATATAACAAATTGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGATAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTGTCATTGCAGACCCCCCGCCCAGCCCCAAGGGGACCCGACAGGCCCGAAGAAATCGAAGAAGGAGGTGGAGAAAGAGACAGAGACAGATCAGTGCGATTAGTGACTGGATTCTTCGCACTTTTCTGGGACGATCTACGAAGCCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTAATCTTGATTGTAGTGAGGGTTGTGGAAATTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTGCAGTACTGGAGTCAGGAAATAAAGAATAGTGCTATCAGCTTGCTCAACGCCACAGCTATAGCAGTAGCTGAGGGGACAGATAGAATCATAGGAGTAGTACAAAGAACTTGGAGAGCTTTTATCCACATACCTAGGAGAATAAGACAGGGCTTTGAAAGGGCTTTGCTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8130, - "end": 8222, - "orientation": "forward", - "distance": 0.9303030303030304, - "indel_impact": 0, - "protein": "RPPAQPQGDPTGPKKSKKEVEKETETDQCD", - "nucleotides": "AGACCCCCCGCCCAGCCCCAAGGGGACCCGACAGGCCCGAAGAAATCGAAGAAGGAGGTGGAGAAAGAGACAGAGACAGATCAGTGCGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8131, - "end": 8406, - "orientation": "forward", - "distance": 0.5591397849462365, - "indel_impact": 0, - "protein": "DPPPSPKGTRQARRNRRRRWRKRQRQISAISDWILRTFLGRSTKPVPLQLPPLERLNLDCSEGCGNSGTQGVGSPQILVESPAVLESGNKE", - "nucleotides": "GACCCCCCGCCCAGCCCCAAGGGGACCCGACAGGCCCGAAGAAATCGAAGAAGGAGGTGGAGAAAGAGACAGAGACAGATCAGTGCGATTAGTGACTGGATTCTTCGCACTTTTCTGGGACGATCTACGAAGCCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTAATCTTGATTGTAGTGAGGGTTGTGGAAATTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTGCAGTACTGGAGTCAGGAAATAAAGAATAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8550, - "end": 9117, - "orientation": "forward", - "distance": 0.9368932038834918, - "indel_impact": 122, - "protein": "MGGKWSKKSGGGWPAVREKMKRTEPAAEGVGAASRDLDKYGAITSSNTAQTNPDCAWLEAQEEEEVGFPVRPQVPLRPMTYKAAVDMSHFLKEKGGLEGLIHSQKRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGGCFK", - "nucleotides": "ATGGGTGGAAAATGGTCAAAAAAGAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAAAATGAAGCGAACTGAGCCAGCAGCAGAGGGGGTGGGAGCAGCATCTCGAGACCTGGACAAATATGGAGCAATCACAAGTAGCAATACAGCACAGACCAATCCTGATTGTGCCTGGCTAGAAGCACAAGAAGAGGAAGAGGTAGGCTTTCCAGTCAGACCCCAGGTACCTTTGAGACCAATGACTTACAAGGCAGCTGTGGATATGAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAAGAGACAAGATATCCTTGATCTGTGGATCTATCACACACAAGGCTACTTCCCTGATTGGCAAAATTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGAGGGTGCTTCAAGTAGTGTGTGCCCGTCTGTTGTGTGACTCTGGTAACTAGAGATCCCTCAGACCATTTTAGTCAGCGTGGAAAATCTCTAGCAGTGGCGCCCGAACAGGGACCTGAAAGCGAAAGTAGAACCAGAGGAGATCTCTCGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK114856.1": [ - { - "region": "gag", - "start": 492, - "end": 2021, - "orientation": "forward", - "distance": 1.871000000000001, - "indel_impact": 828, - "protein": "MTKTLLVQNANPDCKTILKALGPAATLEEMMTACQKVRRPGHKAKVLAEAMSQATGAANIMMQRGNFKNQRKPVKCFNCGKEKHIAKNCKAPKKKGC", - "nucleotides": "ATAGGTGCGAGAGCGTCAGTATTGAGCAGAGGAGAATTAGATAGATAGGAGAAAATTCAGTTAAGGCCAAGGAGAAAGAAAAAATATAGATTAAAACATATAGTATAGGCAAGCAAGGAACTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGGAGGCTGTAAACAGATATTAGAACAGCTACAACCATCCCTTCAGACAAGATCAGAAGAACTTAGATCATTATATAATACAGTAGCCACCCTCTATTATGTACATCAAAAGATAGATGTAAAAGACACCAAAGAAGCGTTAGACAAAGTAGAAGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGCGGCAGCTGACACAAGAAACAGAGGCCAGACCAGTCAAAATTTCCCTATAGTGCAGAACCTACAAGGGCAAATAGTACATCAGGCCATATCACCTAGAACTTTAAATGCATAAGTAAAAGTAGTAGAAGAAAAAGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACTATGCTAAACACAGTAAGTAGACACCAAGCAGCTATGCAAATGTTAAAAAAGGTCATCAATGAAGAAGCTGCAGAATAAGATAGATTACATCCAGTGCATGCAAGGCCTATTGCACCAGGCCAGATAAGAGAACCAAAAAGAAGTGACATAGCAAGAACTACTAGTACCCTTCAGGAACAAATAAGATAGATGACACATAATCCACCTATCCCAGTAAGAAAGATTTATAAAAGATAAATAATTCTAGGACTAAATAAAATAGTAAAAATGTATAGCCCTACCAGCATTTTAGACATAAAGCAAAGGCCAAAAGAACCCTTTAGAGACTATGTAGACCAGTTCTATAAAACTTTAAGAGCCAAGCAAGCTACACAGAAAGTAAAAAATTAGATGACAAAAACCTTGTTAGTCCAGAATGCAAATCCAGATTGTAAAACTATTTTAAAAGCATTAGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAAAAAGTAAGAAGACCCGGCCATAAAGCAAAAGTTTTAGCTGAAGCAATGAGCCAAGCAACAGGTGCAGCCAACATAATGATGCAGAGAGGCAATTTTAAGAACCAAAGAAAGCCTGTTAAATGTTTCAATTGTGGCAAAGAAAAGCACATAGCCAAAAATTGCAAGGCCCCTAAGAAAAAAGGCTGTTAGAAATATAGAAAAGAAAGACACCAAATGAAAAATTGCACTAAGAGACAGGCTAATTTTTTAAAGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGAGAACTTTCCTCAAAGCCGACCAGAAATTCCTCAGAGCAGACCAGAGCCAACAGCCCCACCAGCCCCTCCAGAAAAGAGCTTCAAGTTTGAAGAAGCAACAACTCCCTCTCAGAAGCAGGAGACAATAGACAAGGAGCTGTATCCTCTAACTTCCCTCAGATCACTCTTTGGCAACGACCCATCGTCACAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1787, - "end": 4825, - "orientation": "forward", - "distance": 0.5422287390029328, - "indel_impact": 1998, - "protein": "ASQIYAKIKVKQLCKLLKRTKALTEVVPLTEEAELELAENRKILKDPVHRAYYDPAKDLIAELQKQREGQWTYQIYQKPFKNLKTEKYARTKGAHTNDVKQLTEAVQKISTESIVI", - "nucleotides": "TTTTTTAAAGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGAGAACTTTCCTCAAAGCCGACCAGAAATTCCTCAGAGCAGACCAGAGCCAACAGCCCCACCAGCCCCTCCAGAAAAGAGCTTCAAGTTTGAAGAAGCAACAACTCCCTCTCAGAAGCAGGAGACAATAGACAAGGAGCTGTATCCTCTAACTTCCCTCAGATCACTCTTTGGCAACGACCCATCGTCACAATAAAGATAAGAGAGCAACTAAAGAAAGCTTTATTAAATACAGGAGCAGATGATACAGTATTAGAAGACATAGATTTGCCAAGAAAATAGAAACCAAAAATGATAAGAAGAATTAGAAGTTTTATCAAAGTAAGACAATATGAACAGGTACCCATAGAAATTAGCAGACACAAAGCTATAAGTACAGTGTTAGTAGGACCTACACCTGTCAACATAATTAGAAGAAATCTGTTGACTCAGCTTAGTTGCACTCTAAATTTTCCCATCAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAAGAATGGACGGCCCAAAAGTTAAACAATAGCCATTAACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATAGAAAAAGAAAAGAAAATTTCAAAAATTAGGCCTGAAAATCCATACAATACTCCAGTGTTTGCCATAAAGAAAAAAGATGGCACTAAATAGAAAAAATTAGTAGATTTCAGAGAACTTAATAAGAAAACTCAAGATTTCTAAGAAATTCAATTAAGTATACCACATCCTGCAAAGCTAAAAAAGAAAAAATCAGTCACAGTACTAGATGTAGGTGATGCATATTTTTCAGTTCCCTTAGATAAAGACTTCAAGAAATATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAAAGATTAGATATCAGTATAATGTGCTTCCACAAAGATAGAAAAGATCACCAGCAATATTCCAAAGTAGCATGACAAAGATCCTAGAGCCTTTTAGAAAGACAAATCCAGACATAGTTATCTATCAATACATAGATGATTTGTATGTAAGATCTGACTTAGAAATAAGGCAACATAGAACAAAAGTAAAGGAACTGAGGCAACATCTAATGAGGTAAAGATTTACCACACCAGACAAAAAGCACCAGAAAGAACCCCCATTCCTTTAGATGAGGTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTACTGCCAGAAAAAGAAAGTTAGACTGTCAATGACATACAGAAGTTAGTAAGAAAATTGAATTAGGCAAGTCAGATTTATGCAAAGATTAAAGTGAAGCAATTATGTAAGCTCCTTAAAAGAACCAAAGCATTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAAAGATTCTAAAAGATCCAGTACATAGAGCATATTATGACCCAGCAAAGGACTTAATAGCAGAACTACAGAAGCAAAGAGAAGGTCAGTGGACATATCAAATTTATCAAAAGCCATTTAAAAATCTAAAAACAGAGAAATATGCAAGAACGAAAGGTGCCCATACTAATGATGTAAAGCAATTAACAGAAGCAGTGCAAAAAATATCCACAGAAAGTATAGTAATATAAAGAAGGACTCCTAAATTTAGATTACCCATACAAAAAGAAACATAAGAAACATAGTGGACAGATTATTGGCAAGCCACCTAGATCCCTAAGTAAGAGTTTGTCAACACCCCTCCCTTAGTGAAATTATAGTACCAGTTAGAAAAAGAACCCATAATAAGAGCAGAAACCTTCTATGTAGATAAGGCAGCTAATAAAGATAATAAATCAAGAAAAGCAAGATATGTTACTGACAGAAGAAGACAAAAAGTTGTCACCATAAATGACACAACAAATCAGAAAACTGAATTACAAGCCATTCATCTAGCCTTGCAAGATTCAGGGCTAGAAGTAAACATAGTAACAGACTCACAGTATGCATTAAAGATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTCAGTCAAATAATAGAACAGTTAATAAAAAAGAAAAAAGTCTACCTGGCATAAGTGCCAGCCCACAAAAGAATTAAAAGAAATGAACAGGTAAATAAACTAGTCAGTGCTAGAATCAAGAAAGTACTATTTTTAGATAAAATAGAAAAAGCCCAAGAAGACCATAAAAAATATCACAGTAATTAAAGAACAATGGCTAGTAATTTTAACCTACCACCTATAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAAAAGAAGCTATGCATAGGCAAGTAGACTGTAGTCCAGGTATATGGCAGCTAGATTGTACACATTTAGAAGAAAAAATTATCCTAGTAGCAGTTCATGTAGCCAGTAGATATATAGAAGCAGAAGTGATTCCAGCAGAGACAAGGCAAGAAACAGCATACTTTATTTTAAAACTAGCAAGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCCGCAATTTCACCAGTAATACAGTGAAGGCCGCCTGTTAGTAGGCAAAAATCAAGCAAGAATTTAGTATTCCCTACAATCCCCAAAGTCAAGAAGTAGTAAAATCTATAAATAATGAATTAAAGAAAATTATAAGACAAGTAAAAGATCAGGCTAAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGAAGGATACAGTGCAGAGGAAAGAATAGTAGACATAATAGCAACAGAAATACAGACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCAGGTTTATTACAAGGACAGCAGAGATCCACTTTAGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAAAGGCAGTAGTAATACAAGATAACAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAAATCATTAAGGATTATAAAAAACAGATGGCAAGTGATGATTGTGTGGCAAGTAGACAGGATGAAGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4770, - "end": 5348, - "orientation": "forward", - "distance": 1.928125, - "indel_impact": 328, - "protein": "MIVWQVDRMKIRTWKSLVKYHMYISKKAKK", - "nucleotides": "ATAAAAAACAGATGGCAAGTGATGATTGTGTGGCAAGTAGACAGGATGAAGATTAGAACATGGAAAAGTTTAGTAAAATACCATATGTATATTTCAAAGAAAGCCAAGAAATAGGCTTATAGACACCATTACGAAACCACTCATCCAAGAATAAGTTCAGAAGTACATATCCCACTAGGAGGTGCTAGATTAGTAATAACAACATATTAAGGTCTGCATACAGGAGAAAAAGACTGGCATTTAGGCCAGGGAGTCTCCATAGAATGGAGAAAAAGGAGATATAGCACACAAGTAGACCCTGACTTAGCAGACCATCTAATTCATGTGCATTATTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATAGAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAAGTAAGATCTCTACAATACTTGGCACTAACAGCATTAATAACACCAAAGAAGATAAAGCCACCTTTGCCTAGTGTAAAGAAACTGACAGAAGATAGATAGAACAAGCCCCAGAAGACCAAAGGCCACAGAAAGAGCCATACAATGAATAGACACTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5288, - "end": 5578, - "orientation": "forward", - "distance": 1.7061855670103092, - "indel_impact": 143, - "protein": "MLFIHFRIKCHHSRIGIVLQRRARNRASRS", - "nucleotides": "ATAGAACAAGCCCCAGAAGACCAAAGGCCACAGAAAGAGCCATACAATGAATAGACACTAGAGCTTTTAGAAGAGCTTAAGAGAGAAGCTGTTAGACATTTCCCTAGACCATGGCTACAAAACCTAAGACAATATATCTATGAAACTTATAAAGATACTTGGACAAGAGTAGAAGCCATAATAAGAATTCTGCAACAAATGCTGTTTATTCATTTCAGAATTAAGTGTCACCATAGCAGAATAGGCATTGTTCTGCAAAGAAGAGCAAGAAATAGAGCCAGTAGATCATAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5559, - "end": 5773, - "orientation": "forward", - "distance": 0.5, - "indel_impact": 123, - "protein": "MKLIKILGQE", - "nucleotides": "ATAGAGCCAGTAGATCATAGACTAGAGCCCTAGAAGCATCCAGGAAGTCAGCCTAGGACTCCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTGTGTTTTATACAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5698, - "end": 5773, - "orientation": "forward", - "distance": 0.5769230769230769, - "indel_impact": 0, - "protein": "MAGRSGDRDEDLLKTVRLIKFLYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5790, - "end": 6038, - "orientation": "forward", - "distance": 1.7719512195121947, - "indel_impact": 122, - "protein": "MQPLKILAIVALVVAAIIAIVV", - "nucleotides": "ATGCAACCTTTAAAGATATTAGCAATAGTAGCATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTAGACCATAGTAGGCATAAAATATAAGAAAATATTAAGACAAAGAAAAATAGATAGAATAATTAATAGAATAAGAAAAAGAGCAGAAGACAGTGGCAATGAGAGCGAAAAAGATCAGGAAGAATTGTCAGCACTTGTAGTAGAAATAAGGCACGATGCTCCTTAAGATGTTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 5953, - "end": 8520, - "orientation": "forward", - "distance": 1.890116279069764, - "indel_impact": 1453, - "protein": "MTNCSFNITTEIRDKVRKEYALFYKLDVMPIDKDNTSYTLINCNTSVITQACPKVTFEPIPIHYCTPASFAILKCKDEMFNRTRPCKNVSTVQCTHRIRPVVSTQLLLNSSLAEKKIVLRSENFTDNTKNIIVQLNRSIVINCTRPNNNTRKSISVAKRAIYATRQIIEDIRQAHCNISETD", - "nucleotides": "ATGAGAGCGAAAAAGATCAGGAAGAATTGTCAGCACTTGTAGTAGAAATAAGGCACGATGCTCCTTAAGATGTTGATGATCTGTAGTGCTGCGGAAAATTTGTAAGTCACAGTCTATTATGAGGTACCTGTGTAAAGAGATGCAAATACCACTCTATTCTGTGCATCAGATGCTAAAGCATATGATACAGAAGTACATAATGTTTAGGCCACACATGCCTGTGTACCCACAGACCCTAACCCACATGAAGTAGAATTGAAAAATGTGACAGAAAATTTTAATATGTAGAAAAATAACATGGTAGACCAGATGCATGAGGATATAATCAATTTATGAGATCAAAGCCTAAAGCCATGTGCAAAATTGACCCCACTCTGTGTTACTTTAAATTGCACTGATTTAAAAAATAATACTGTAGGAAATCAAACAAATTATCATCTCAATGAAACTAATACAATACAAAGAAAAGAAATGACAAACTGCTCTTTCAATATCACCACAGAGATAAGAGATAAAGTGCGAAAAGAATATGCACTTTTTTATAAACTTGATGTCATGCCAATAGATAAAGATAATACAAGTTATACATTGATAAATTGCAATACCTCAGTCATTACACAGGCCTGTCCAAAGGTAACCTTTGAACCAATTCCCATACATTATTGTACTCCAGCTAGTTTTGCAATTCTAAAGTGTAAAGATGAGATGTTCAATAGAACAAGACCATGTAAGAATGTCAGCACAGTACAATGTACACATAGAATTAGACCAGTAGTGTCAACTCAACTGCTGTTAAATAGTAGCCTAGCAGAAAAAAAGATAGTACTTAGATCTGAAAATTTCACAGACAATACTAAAAATATAATAGTACAGCTAAATAGATCTATAGTAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGTATAAGTGTAGCAAAGAGAGCAATTTATGCAACAAGACAGATAATAGAAGATATAAGACAAGCACATTGTAACATCAGTGAGACAGATTGAAGTGACACTTTAAGCAAAATAGTTGAAAAATTAAAGGAAAAATTTAGAAAAAATAAAACAATAATCTTTAAGCAATCATCAAGAGAGGACATAGAAATTGAAACGCACAGTTTTAATTGTAGAGAGGAATTTTTCTACTGTAACACAACACGACTGTTTAATAGTACTTGAAGTGTTAATAGAACTAGCATAAACAGAACTAACAATAAAAATATCACACTCCCGTGCAGAATAAAACAAATTATCAACAGGTGGCAGGAAGTAAGAAAAGCAATGTATGCCCCTCCTATCAGTAAGATAATTAGGTGCTCATCAAATATTACAGGACTAATATTAACAAGAGATAGTAGTACAACTAATAGTAAAGAAGAGACCTTCAGACCTAGAGAAAGAAATATGAAGGACAATTAGAGAAGTGAATTATATAAATACAAAGTAGTAAAAATTGAGCCATTAAAAGTAGCACCCACCAAGGCACAAAGAAAAGTAGTGCAGAGAGAAAAAAGAGCAATAAGAACGTTAGGAGCTATGTTCCTCAGGTTCTTAAGAACAGCAGGAAGCACTATAGGCGCAGCGTCACTGACGCTGACAGTACAAGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATCTGCTGAAGGCTATTGAGGCGCAACAGCATATGTTGCAACTCACAGTCTAAGGCATCAAACAGCTCCAGGCAAGAGTCCTGTCTGTAGAAAGATACCTACAAGATCAACAGCTCCTGAAGATTTGAAGTTGCTCTAGAAAACTCATTTGCACCACTACTGTGCCTTAGAATACTAGTTAGAGCAATAAATCTTACAGTACCATCTAAGATAACATGACCTAGATGCAGTAGGACAGAGAAATTCAAAATTACACAAAGATAATATACAACTTACTTAAAGAATCGCAAATCCAACAGAAAAAGAATGAAAAAGAATTATTAGAACTAGATCAATGAGCAAATTTGTAGAATTAGTTTAGTATAACAAAATGGCTATAGTATATAAAAATATTCATAATGATAGTAGGAGGCTTAGTAAGTTTAAAAATAATTTTTGCTGTATTTTCTATAGTGAATAGAGTTAGGCAAGGATACTCACCATTATCATTGCAGACCCTCCTCCCAACCCCAAGAAGACCAGACAGGCCCGAAAGAATAGAAGAAGAAGGTGAAAAGAGAGACAGAGGCAGATCCAGAAGATTAGTGACTAGATTCTTACCACTCATCTAAGACGACCTGCAAAGCCTGTGCCTTTTCAGCTACCACCACTTAAAAGACTTACTCTTGATTGTGCTGAAGACTGTGCAAATTCTAGGACACAAGAAGTAAGAGATCCTCAAGTACTAGTAGAGTCTCCTGCAGTATTAAATTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATACCATAGCTATCGCAGTAGCTGAAAGAACAGATAAAGTTATAGAAGTAAGACAAAAAATTAGCAGAGCTTTTCTCCACATACCTAGAAAGATAAGACAAGGCTTAGAAAAGGCTTTGCAATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8102, - "end": 8194, - "orientation": "forward", - "distance": 0.7741935483870968, - "indel_impact": 37, - "protein": "RPSSQPQEDQTGPKE", - "nucleotides": "AGACCCTCCTCCCAACCCCAAGAAGACCAGACAGGCCCGAAAGAATAGAAGAAGAAGGTGAAAAGAGAGACAGAGGCAGATCCAGAAGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8103, - "end": 8378, - "orientation": "forward", - "distance": 0.8478260869565217, - "indel_impact": 69, - "protein": "ILTTHLRRPAKPVPFQLPPLKRLTLDCAEDCANSRTQEVRDPQVLVESPAVLNSGTKE", - "nucleotides": "GACCCTCCTCCCAACCCCAAGAAGACCAGACAGGCCCGAAAGAATAGAAGAAGAAGGTGAAAAGAGAGACAGAGGCAGATCCAGAAGATTAGTGACTAGATTCTTACCACTCATCTAAGACGACCTGCAAAGCCTGTGCCTTTTCAGCTACCACCACTTAAAAGACTTACTCTTGATTGTGCTGAAGACTGTGCAAATTCTAGGACACAAGAAGTAAGAGATCCTCAAGTACTAGTAGAGTCTCCTGCAGTATTAAATTCAGGAACTAAAGAATAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8522, - "end": 9175, - "orientation": "forward", - "distance": 1.9499999999999997, - "indel_impact": 403, - "protein": "MSLHRMEDSEGEVLMWKFDSSLAFHHRAREQHPEYYKDC", - "nucleotides": "ATAAGTGGCAAGTAGTCAAAAAGTTGTATGGCTAGATAGCCTGCTGTAAAAGAAAGAATAGAAAGAGTTAATCCAAGGCCTGCTGCAAAGAAAGAACAAGCTGAGCCAGCAGCAGCTAAGGTAAGAGCAGCATCTCGAGACTTAGAAAAATATAGAGCAATCACAAGTAGCAATACATCAACTACTAATGCTGCTTGTGCCTAGCTAGAAGCACAAGAGGAAGAAGAAGTAGGCTTTCCAGTCAGACCTCAAGTACCTTTAAGACCAATGACTTACAAAGCAGCTTTAAATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTAGTCCCAGAGAAGACAAGAAATCCTTGATTTGTGGGTTTACCACACACAAGGATACTTCCCTGATTGGCAAAACTACACACCAGGACCAGGGGTCAGAAATCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGACCAAGTAGAGAAGGCCAATGAAGGAGAGAACAACTGCTTGCTACACCCTATGAGCCTGCATAGGATGGAAGACTCAGAGGGAGAAGTGTTAATGTGGAAGTTTGACAGCAGCCTGGCGTTTCATCATAGAGCCCGAGAACAACATCCGGAGTACTACAAAGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115009.1": [ - { - "region": "gag", - "start": 302, - "end": 1820, - "orientation": "forward", - "distance": 1.8444000000000005, - "indel_impact": 844, - "protein": "MTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVRGPGHKARVLAEAMSQVTKSASIIVQGGNFKNQRKNVKCFNCGKERHTAKNCRAPKKKGC", - "nucleotides": "ATGGGTGCTAGAGCGTCAGTATTAAGCGGCGGAAAATTAGATAGATAGGAAAAAATTTACCTAAGGCCAGAAGGAAAGAAAAAATATAGATTAAAACATATAGTATAGGCAAGCAGGGAGCTAGAACGATTCGCAGTCAACCCTGGCCTGTTAGAGTCATCAGAAGGCTGCAGGCAAATTCTAGGACAGCTACAACCAGCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAGTCCTCTATTGCGTACATCAAAGGATAAATGTAAAAGACACCAAGGAAGCTCTAGACAAAATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAACAAGCAGCAGCTGACACAGGAAACAGCAGCAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACCTACAGGGGCAAATAGTACATCAACCATTATCACCTAGAACTTTAAATGCATAGGTAAAAGTAATAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCACTATCAGAAAGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGAGAGGACATCAAGCAGCCATGCAGATGTTAAAAGAAACCATCAATGATGAGGCTGCAGAATAGGATAGATTGCATCCAGTGCATGCAGGCCCTATTGCACCAGGCCAGATGAAAGAACCAAGAGGAAGTGACATAGCAGGAACCACTAGTACCCTTCAGGAACAAATAAGATAGATGACACATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTAAGGCTAAACAAAATAGTAAGAATGTATAGCCCTGTCAGCATTTTGGACATAAGACAAAGACCAAAAGAACCCTTTAAAGATTATGTAGACCGATTCTATAAAACTCTAAAGGCTGAGCAAGCGTCACAGGATGTAAAAAATTAGATGACAGAAACCTTGTTAGTCCAAAATGCGAATCCAGATTGTAAGACCATTTTAAAAGCATTAGGACCAGCAGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTAAGAGGACCCGGCCACAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAAGTCAGCCTCCATAATAGTGCAAGGAGGCAATTTTAAGAACCAAAGAAAGAATGTTAAGTGTTTCAATTGTGGCAAAGAAAGACACACAGCCAAAAATTGCAGGGCCCCTAAGAAAAAAGGCTGTTAAAAATGTAGAAAGGAAAGACACCAAATGAAAGATTGTACTAAGAGACCAGACTAAGACGGCTAATTTTTTAAGGAAGATTTGGCCTTCCCACAAGGGAAGGCCAGAGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTGAGGAAGAGGCAACAGCTCCTCCTCAGAAGCAGGAGACGAAAGACCAAGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1613, - "end": 4624, - "orientation": "forward", - "distance": 0.4463220675944335, - "indel_impact": 1728, - "protein": "AEIKQEFSIPYNPQSQRVVKSMNNKLKKIIGQVKDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQISKIQNFQVYYRDSREPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKILRDYRKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAAGGAAGATTTGGCCTTCCCACAAGGGAAGGCCAGAGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTGAGGAAGAGGCAACAGCTCCTCCTCAGAAGCAGGAGACGAAAGACCAAGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAAAAATAGGAGGGCAACTAAAAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATAAATTTGCCAGGAAAATAGAAACCAAAAATGATAAAAAGAATTAGAAGTTTTATCAAAGTAAGACAGTATGATCAGGTACCCATAGAAATTTGTGGACATAAAGCTATAGGTACAGTATTAATAGGACCTACGCCAGTCAACATAATTAGAAGAAATCTGTTGACCCAGCTTAGTTGCACTTTAAATTTTCCCATCAGTCCTATTGAAACTGTACCAGTAAAATTAAAGTCAGGAATAGATAGCCCAAGAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAGGCATTAATAGAAATTTGTGCAGAAATAGAAAAGGAAAAGAAAATTACAAAAATTAGGCCTGAGAATCCATATAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGAAAAAAATTAGTAGATTTCCGAGAACTTAATAAGAGAACACAAGACTTCTAAGAAGTTCAACTAAGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTAGATGTAGGTGATGCATATTTTTCAGTTCCTTTAGACAAAGACTTCAGAAAGTATACTGCATTTACCATACCCAGTATAAACAATGAGGCACCAGAAGTTAAATATCAGTACAATGTGCTTCCACAGAGATAGAAAAGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGGAAAATCCAGACATAGTTATCTATCAATACATAGATGATTTGTATGTAAGATCTGACTTAGAAATAGAGCAGCACAGAACAAAAATAGAAGAACTGAGACAACATCTGTTAAGGTGAGGACTCACCACACCAGACAAGAAACATCAGAAAAAACCTCCATTTCTTTAGATAAGTTATGAACTCCATCCTGATAAATAGACAGTACAGCCTATACAGCTGCCAGACAAAGACAGCTAGACTGTCAATGACATACAGAAGTTAGTAAGAAAGTTGAATTAGGCAAGCCAGATCTATCCAGAGATTAAAGTAAAGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGCATATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGAGGAGAGACCAATAGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGAGAAATATGCAAGAACGAGAGGTGCCCACACTAATGATGTAAAACAGTTAACAGAGGCAGTGCAAAAAATAGCCCTAGAAGCCATAGTAATATGAAGAAAGACTCCTAAATTTAAACTACCTATACAAAAAGAAACATAGAAAATGTGGTGGACAAAGTATTGGCAAGCCACCTAGATTCCTGAGTAAGAATTTGTCAATACCCCTCCCTTAGTAAAACTATAGTACCAGTTAGAGAAAGAACCCATAGTAAGAGCAGAAACTTTCTATGTAAATAAGGCAGCTAATAGAAAGACTAAATTAAGAAAAGCAGAGTATGTTACGGACAGAAGAAGACAAAAGGTTGTCTCCCTAATAGACACAACAAATCAGAGGACTAAGTTACACGCAATTCATCTAGCTTTGCAGGATTCAGGATCAACAGTAAACATAGTGACAGACTCACAATATGCCTTAAAGATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGCTAATAAAAAAAGAAAAGATTTACCTGGCATAAGTCCCAGCACACAAAAGAATTGAAGGAAATGAACAAGTAGATAAATTAGTCAGTAATAGAATCAGAAGAGTACTATTTCTAGATAGAATAGATAAAGCCCAAGAAGAACATGAAAAATATCACAGTAATTAGAGAGCAATGGCTAGTAATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAGTGTCAGTTAAAAGGAGAACCTATGCATAGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAAGAAAAATTATCCTAGTAGCAGTTCATGTAGCCAGTAGATACATAGAAGCAGAAGTTATTGCAGCAGAAACAGGGCAGGAAACAGCATACTTTATCTTAAAATTAGCAGGAAGATAGCCAGTAAAAATAATACATACAGACAATGGCAGCAATTTCACCAGCACTACAGTTAAAGCCGCCTGTTAGTAGGCAGAGATCAAGCAGGAATTTAGTATTCCCTACAATCCTCAAAGTCAAAGAGTAGTAAAATCTATGAATAATAAATTAAAGAAAATTATAGGACAGGTAAAAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATTCATAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAACTACAAAAACAAATTTCAAAAATTCAAAATTTTCAGGTTTATTACAGGGACAGCAGAGAACCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCCTTAGAGATTATAGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4569, - "end": 5147, - "orientation": "forward", - "distance": 1.990212765957447, - "indel_impact": 166, - "protein": "MAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQISKIQNFQVYYRDSREPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKILRDYRKQMAGDDCVASRQDED", - "nucleotides": "ATAGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATAGAACAGTTTAGTAAAACACCATATGCATATTTCAAAGAAAGCTCAGAGATAGGTTTACAGACATCACTATGAAAGCCATAATCCAAAAACAAGTTCAGAAGTACACATCCCATTAGAGGAAGCAAGATTGGTAATAAAAACATATTAAGGTCTGCATACAGGAGAAAGAGACTGGCATTTAGGCCAGGGAGTATCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTGGCCTGGCAGACCAACTAATTCACCTGTATTATTTTGATTGTTTTTCAGAATCTGCTATAAGGAATGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAGGCAGGACATAACAAGGTAGGATCCCTACAATACTTGGCATTAACAGCATTAATAACACCAAAAAGGAGAAAGCCACCTTTGCCCAGTGTTACAAAATTAACAGAGGACAGATGGAACAAGTCCCAGAGGACCAAAGGCCACAAAAGGAGCCATACAATAAATGGACATTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5087, - "end": 5377, - "orientation": "forward", - "distance": 1.2802083333333347, - "indel_impact": 88, - "protein": "MEQVPEDQRPQKEPYNKWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDT", - "nucleotides": "ATGGAACAAGTCCCAGAGGACCAAAGGCCACAAAAGGAGCCATACAATAAATGGACATTAGAGCTCTTAGAAGAGCTTAAGAGTGAAGCTGTTAGGCATTTTCCTAGACCATGGCTTCATAGCTTAGGACAATATATCTATGAAACTTATGGAGATACTTAGGCAGAGGTAGAAGCCATAATAAGAACCCTGCAACAACTGTTGTTCATTCATTTCAGAATTAGGTGTCAACATAGCAGGATAGGAATTATTCGACAGAGGAGAGCAAGAAATAGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5358, - "end": 5572, - "orientation": "forward", - "distance": 0.6712328767123288, - "indel_impact": 35, - "protein": "MTACNNCYCKRCCFHCQVCFTTKSLGISYGRKKQRQRRRASHSSQNHQAALPEX", - "nucleotides": "ATAGAGCCAGTAGATCCTAGCTTAGAGCCCTAGAAACATCCAGGAAGTCAACCTATGACTGCTTGTAACAATTGCTATTGTAAACGGTGTTGCTTTCATTGCCAAGTTTGCTTCACAACAAAAAGCTTAGGCATCTCCTATGGCAGGAAGAAGCAGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5497, - "end": 5572, - "orientation": "forward", - "distance": 0.7037037037037037, - "indel_impact": 0, - "protein": "MAGRSRDSDEELLTAVRIIKRLYQSX", - "nucleotides": "ATGGCAGGAAGAAGCAGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5589, - "end": 5834, - "orientation": "forward", - "distance": 1.0363636363636366, - "indel_impact": 0, - "protein": "MHALEIAAIAGLVVAAIIAIVVWSIVLIEYKKILRQRKIDRLINRIRERAEDSGNESDEDQEELSALVEMRHLVP", - "nucleotides": "ATGCATGCCTTAGAAATAGCAGCAATAGCAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGTCTATAGTATTAATAGAATATAAGAAAATTTTAAGACAAAGGAAAATAGACAGGTTAATTAATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGACGAGGACCAAGAGGAATTATCCGCACTTGTGGAGATGAGGCATCTTGTTCCTTAAGATAGTAATGATATGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 5752, - "end": 8352, - "orientation": "forward", - "distance": 1.8788617886178816, - "indel_impact": 1437, - "protein": "MKEKGEIKNCSFNVTTGIRDKVTKEHALFYKLNVVPIDEDSKNTTGKYKMINCNTSVITQACPKVSFKPIPIHFCAPASFAILKCKDKKFNRTGECRNVSTVQCTHRIRPVVSTQLLLNGSLAEEKIVIRSANLSNNAKTIIVQLNKSVKINCTRPNNNTRRSIHIGPGRAFYRTDIIRDIRQAHYNISKKD", - "nucleotides": "ATGAGAGTGACGAGGACCAAGAGGAATTATCCGCACTTGTGGAGATGAGGCATCTTGTTCCTTAAGATAGTAATGATATGTAGTGCCAACAACTTGTAGGTCACAGTCTATTATAAAGTACCTGTATAGAAAGAAGCAACTACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGAGACAGAGAAGCATAATGTTTAGGCAACCCATGCCTGTGTACCCACAGACCCCAGCCCACAGGAAGTAGCATTAGAAAATGTGACAGAAACATTTAACATGTAGAAAAATGACATGGTAGAGCAGATGCATGAGGATATAATCAGTTTATAGGATCAAAGCCTAAAGCCATGTGTGAAACTAACCCCACTCTGTGTTACTTTAAATTGCACTGATAATTTAAATCTAACTTGCCCTAACAATAATACTTGTAGTAATAATACTAATTATAATATGAAAGAAAAAGGAGAAATAAAAAACTGCTCTTTCAATGTCACCACAGGAATAAGAGATAAGGTGACAAAAGAACATGCACTTTTCTATAAACTTAATGTAGTACCAATAGATGAAGATAGTAAAAATACTACGGGCAAATATAAGATGATAAATTGTAACACCTCAGTCATTACACAAGCCTGTCCAAAGGTATCCTTTAAGCCAATTCCCATACATTTTTGTGCCCCGGCTAGTTTTGCGATTCTAAAGTGTAAAGATAAGAAATTCAATAGAACAGGAGAATGTAGAAATGTCAGCACAGTACAATGTACACATAGAATTAGGCCAGTAGTATCAACTCAACTGCTGCTAAACGGCAGTCTAGCAGAAGAAAAGATAGTAATTAGATCTGCCAATCTCTCGAACAATGCTAAAACCATAATAGTACAGCTGAATAAATCTGTAAAAATTAATTGTACAAGACCCAACAATAATACAAGAAGAAGTATACACATAGGACCAGGGAGGGCATTTTATAGAACAGACATAATAAGAGATATAAGACAAGCGCATTATAACATTAGTAAGAAAGATTAGAATGACACTTTAAAACAGATAGTTATAAAATTAAAAGAAAAATTTAAGAATAAAACAATAGTCTTTACTCAATCCTCAGGAGAAGACCCAGAGATAGTGATGCATAGTTTTAATTGTAGAGAGGAATTTTTCTACTGTAATACAACACAGCTGTTTAATAGTACTTAGAATAATAGTACTTAGAATAGTACTGATAATTAGAATAGTACTGAATCAAATAGCACTATCACACTCCCATGCAGAATAAAACAAATTATAAACTTGTGGCAGGAAGTAAGAAGAGCAATGTATGCCCCTCCCATCCAAAGGCAAATTAGATGTTCATCAAATATTACAGGGCTGCTGCTAGTAAGAGATGGTAGGAGCAACAATAGTAGTAATGATACAGAGACCTTCAGGCCTAGAGGAAGAGATATGAAGGACAATTAGAGAAGTAAATTATATAAATATAAAGTAGTCAAAATTAAACCATTAAGAATAGCACCCACCCATGCAAAGAGAAGAGTGGTGCAAAAGGAAAAAAGAGCAATAAGACTTGAAGCTTTCTTCCTTAGGTTCTTAAGAGCAGCAGGAAGCACTATAGGCGCAGCGTCAGTGACGCTGACAGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTAAGGCATCAAGCAGCTCCAGGCAAAAGTCCTGGCTTTAGAAAGATACCTAAAAGATCAACAGCTCCTAAGGATTTAAAGTTGCTCTGGAAAACTCATTTGCACCACTAATGTGCCCTGGAATATTAGTTGGAGCCCTAGATGGAATAGATCTCTAGATAAGATTTAGACTAACATGACCTAGAAGCAGTAAGAAAAAGAAATTGACAATTACACAGACATAATATATAACTTAATTGAACAATCACAGAACCAACAAGAACAGAATGAACAAGACTTATTGGCATTAGATAAGTAGGCAAGTTTGTAAAATTAGTTTGACATTACACAGTAGCTATAGTATATAAAAATATTCATAATAATAGTAAGAGGCTTGATAAGTTTAAGAATAGTTTTTACTATACTGTCTATAGTGAATAAAGTTAGGCAAGGATACTCACCATTGTCATTGCAGACCCTCCTCCCAACCCAGAGAGGACCCGACAGGCCCGAAAGAACAGAAGAAAGAGGTAGAAAGAAAGACAGAGGCACATCCACGAGATTAGTGCATAGATTCTTAGCACTCATCTAGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAAAGACTTACTCTTAATTGCGGCGAGAATTGTAGAACTTCTAGGACGCAGAAAGTAAGAAGCACTGAAGTATTAGTAGAATCTCCTGCAGTATTAGAGTCAGGAAATAAAGAATAGTGCAGTTAGCTTGCTCAACACCACAGCAATAGCAGTAGCTGAGAGGACAGATAAAATTATAGAAGTATTACAAAGAGGCTTTAGAGCTATTCTCCACATACCTACACGAATAAGACAGGGCTTAGAAAAGGCTTTGCTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 7934, - "end": 8026, - "orientation": "forward", - "distance": 0.7741935483870968, - "indel_impact": 0, - "protein": "RPSSQPREDPTGPKEQKKEVERKTEAHPRD", - "nucleotides": "AGACCCTCCTCCCAACCCAGAGAGGACCCGACAGGCCCGAAAGAACAGAAGAAAGAGGTAGAAAGAAAGACAGAGGCACATCCACGAGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 7935, - "end": 8210, - "orientation": "forward", - "distance": 0.7826086956521738, - "indel_impact": 70, - "protein": "ILSTHLGRPAEPVPLQLPPLERLTLNCGENCRTSRTQKVRSTEVLVESPAVLESGNKE", - "nucleotides": "GACCCTCCTCCCAACCCAGAGAGGACCCGACAGGCCCGAAAGAACAGAAGAAAGAGGTAGAAAGAAAGACAGAGGCACATCCACGAGATTAGTGCATAGATTCTTAGCACTCATCTAGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAAAGACTTACTCTTAATTGCGGCGAGAATTGTAGAACTTCTAGGACGCAGAAAGTAAGAAGCACTGAAGTATTAGTAGAATCTCCTGCAGTATTAGAGTCAGGAAATAAAGAATAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8354, - "end": 8995, - "orientation": "forward", - "distance": 1.7004854368932047, - "indel_impact": 395, - "protein": "MTYKRALDISHFLKEKGGLDGLIYSKRRQDILDLWLYNTQGYFPDWQNYTPGPGVRFPLCFR", - "nucleotides": "ATAGGTAACAAGTTGTCAAGAAGGCTCAAGGCTAGATGGCCTGCCATAAAGGAGAAAATAAGACGAGCTAGGCCAGTAAGAGAGCCAGAGCCAGCAACAGCTAAGGTAAGAGCAGCATCTCGAGACCTGAAAAGACATGGAGCACTTACAAGTAGCAATACAGCAGCTACCAATGCTGATGTTGCCTGCCTAGAAGCACAACAGAAGAAAGAAGAGGTAAGTTTTCCAGTCAGACCTCAGGTACCTTTAAGGCCAATGACTTACAAGAGAGCTCTAGATATTAGCCACTTTTTAAAAGAAAAAGGGGGACTGGATGGGCTAATTTACTCCAAAAGAAGACAAGACATCCTTGATCTGTGGCTCTACAACACACAAGGCTACTTCCCTGACTGGCAGAACTACACACCAGGGCCAGGAGTCAGATTTCCACTGTGTTTTAGATAGTGCTTCAAGCTAGTACCAGTTGATCCAGATAAAGTAGAAGAGGCCAGTGTAAGAGAGAACAACTGCTTGTTAAGCCCCGAAAACCTGCATAGAATGGAGGACGAACACAGAGAAGTATTGCAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACATAGCCCGAGAGCTGCATCCGGAGTATTACAAGGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115387.1": [ - { - "region": "gag", - "start": 292, - "end": 1794, - "orientation": "forward", - "distance": 0.21157684630738527, - "indel_impact": 0, - "protein": "MGARASVLSGGELDKWEKIRLRPGGKKRYKLKHIVWASRELERFAVNPGLLETTEGCKQILEQLQPSLPTGSEELRSLFNTVATLYCVHKRIEVQDTKEALEKIEEEQNKSKKKAQQAVADTGSTSQVSQNYPIVQNLQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPVAPGQMREPRGSDIAGTTSTLQEQIAWMTSNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYKVLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSNIIMMQRGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSSKGRPGNFLQSRPEPTAPPEESFRFGEETATPPQKQEPVDKEVYPLASLKSLFGNDPSSQ", - "nucleotides": "ATGGGTGCGAGAGCGTCTGTATTAAGCGGGGGAGAATTAGATAAATGGGAAAAAATTCGGTTAAGGCCAGGAGGAAAGAAAAGATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAACCCCGGCCTTTTAGAAACAACAGAGGGATGTAAACAAATACTGGAACAGCTGCAACCATCCCTTCCGACAGGATCAGAAGAACTTAGATCATTATTTAATACAGTAGCAACCCTCTATTGTGTGCATAAGAGAATAGAGGTACAAGACACCAAGGAAGCCTTAGAAAAGATAGAAGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAGGCAGTAGCTGACACAGGAAGTACCAGCCAGGTCAGCCAAAATTACCCGATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGGCCATATCGCCTAGAACTTTAAATGCATGGGTGAAAGTAGTAGAAGAGAAGGCCTTTAGCCCAGAGGTAATACCCATGTTTTCAGCATTATCGGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAGGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTGTTGCACCAGGCCAAATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAAGAACAAATAGCATGGATGACAAGTAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTGGGATTAAATAAGATAGTAAGAATGTATAGCCCTACCAGCATTTTGGACATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAGTCCTAAGAGCCGAGCAAGCATCGCAGGATGTAAAAAATTGGATGACAGAAACCTTATTGGTCCAAAATGCAAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCAGCAACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGGGGACCCGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAAATATCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGACTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTCCAAGGGAAGGCCAGGGAACTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAGGAGAGCTTCAGGTTTGGGGAGGAGACAGCAACTCCTCCTCAGAAGCAGGAGCCGGTGGACAAGGAAGTGTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1587, - "end": 4598, - "orientation": "forward", - "distance": 0.13147410358565748, - "indel_impact": 0, - "protein": "FFREDLAFLQGKARELSSEQTRANSPTRGELQVWGGDSNSSSEAGAGGQGSVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVQLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSITVLDVGDAYFSIPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMIKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLKWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPDKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLKGAKALTEVIQLTEEAELELAENREILKEPVHGVYYDPSKDLIAELQKQGQGQWTYQIYQEPFKNLKTGKYARTRGTHTNDVKQLTEAVQKITTESIVIWGKTPRFKLPIQKETWDTWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTSRGRQKVVSLTDTTNQKTELQAICLALQDSGLEVNIVTDSQYALGIIQAQPDRSESEIVNQIIEQLIKKERVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQEEHEKYHNNWRAMASDFNLPPVVAKEIVASCDKCQLKGEATHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKAIHTDNGTNFTSATVKAACWWAGIKQECGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDNRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTCCAAGGGAAGGCCAGGGAACTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAGGAGAGCTTCAGGTTTGGGGAGGAGACAGCAACTCCTCCTCAGAAGCAGGAGCCGGTGGACAAGGAAGTGTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGAGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGACACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCTATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGCAGAAATTTGTTGACTCAGATTGGTTGTACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTACAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAAGAAGGGAAGATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGGGAACTTAATAAAAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACACCCCGCAGGGTTAAAAAAGAAGAAATCAATAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAATAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGATAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGACATAGTTATCTATCAATACATGGATGACTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACACTTGTTGAAGTGGGGATTGACCACACCAGATAAAAAACATCAGAAAGAACCCCCATTCCTGTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGACAAAGATAGCTGGACTGTCAATGACATACAGAAGCTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTATGCAGGGATTAAAGTGAGACAATTATGTAAACTCCTTAAGGGAGCCAAAGCGCTAACAGAAGTGATACAACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAGTTACAGAAGCAGGGGCAAGGCCAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAGTATGCAAGAACGAGGGGTACCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTGATATGGGGAAAGACCCCTAGATTTAAACTACCCATACAGAAAGAAACATGGGATACCTGGTGGACAGAATATTGGCAAGCCACCTGGATTCCCGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAAAAAGAACCTATTGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAGCAGAGGAAGACAAAAGGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAACTGCAAGCAATTTGTCTAGCATTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCACTAGGAATAATTCAAGCACAACCAGATAGGAGTGAATCAGAGATAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAGGGTCTACCTTGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTCCTATTTTTGGATGGAATAGATAAGGCCCAAGAGGAGCATGAGAAATATCACAATAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGTTGTGATAAATGCCAGCTAAAGGGAGAAGCCACGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAATTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAAGAAACAGCATACTTCCTCTTAAAATTAGCAGGAAGGTGGCCAGTGAAAGCAATACATACAGACAATGGAACCAATTTCACCAGTGCTACGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATGTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAAGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTCCAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAACAGAGATCCACTTTGGAAAGGACCAGCAAAACTTCTTTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4543, - "end": 5121, - "orientation": "forward", - "distance": 0.32164948453608244, - "indel_impact": 0, - "protein": "MENRWQVMIVWQVDRMRISTWKSLVKHHMYISKKAQGWFYRHHYENPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIECRERKYSTQVTPDLADQLIHLYYFDCFAESAIREAILGHRVSPRCEYQAGHNKVGSLQYLALTALVTPKKTKPPLPSVTKLTEDRWNKPQKTKGHRGSQTMNGH", - "nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGCACATGGAAAAGTTTAGTAAAACACCATATGTACATTTCAAAGAAAGCCCAGGGATGGTTTTATAGACATCACTATGAAAATCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCGCTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAAGGAGTCTCCATAGAATGCAGGGAAAGGAAATATAGCACACAAGTAACCCCTGACCTAGCAGACCAACTAATTCACCTGTATTATTTTGATTGTTTTGCAGAATCTGCTATAAGAGAGGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCACTAACAGCATTAGTAACACCAAAAAAGACAAAGCCACCTTTGCCTAGTGTGACAAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCAAACAATGAATGGACACTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5061, - "end": 5351, - "orientation": "forward", - "distance": 0.21875, - "indel_impact": 0, - "protein": "MEQAPEDQGPQREPNNEWTLELLEELKREAVRHFPRTWLHGLGQHIYETYGDTWTGVEALIRILQQLLFIHFRIGCQHSRIGITRQRRARNGASRS", - "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCAAACAATGAATGGACACTAGAGCTCTTAGAAGAGCTTAAGAGAGAAGCTGTTAGACATTTTCCTAGGACATGGCTTCATGGCTTGGGACAACATATCTATGAAACTTATGGGGATACTTGGACAGGAGTGGAAGCCTTAATAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGAATTGGGTGCCAACATAGCAGAATAGGCATCACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5332, - "end": 5546, - "orientation": "forward", - "distance": 0.375, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPKTPCTKCYCKKCCFHCQVCFITKGLGISYGRKKRRQRRRPPQDSQTHQVPLPKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGATTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAGACTCCTTGTACCAAATGCTATTGTAAAAAATGCTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTACCTCTACCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5471, - "end": 5546, - "orientation": "forward", - "distance": 0.46153846153846145, - "indel_impact": 0, - "protein": "MAGRSGDSDEDLLKTVRLIKYLYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTACCTCTACCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5563, - "end": 5808, - "orientation": "forward", - "distance": 0.7764705882352942, - "indel_impact": 0, - "protein": "MQSLYILTIVALVVAAILAIVVWAIVLIEYKKILKQRRIDRLIDRIIDRAEDSGNESEGDQEELSALVEMGHHAPWNVDDL", - "nucleotides": "ATGCAATCCTTATATATATTAACAATAGTAGCATTAGTAGTAGCAGCAATATTAGCAATAGTTGTGTGGGCCATAGTACTCATAGAATATAAGAAAATATTAAAACAAAGGAGAATAGATAGGTTAATTGATAGAATAATAGATAGGGCAGAAGACAGTGGCAATGAGAGTGAAGGAGATCAAGAGGAGTTATCAGCACTTGTGGAGATGGGGCATCATGCTCCTTGGAATGTTGATGATCTATAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 5726, - "end": 8287, - "orientation": "forward", - "distance": 0.6926339285714282, - "indel_impact": 0, - "protein": "MRVKEIKRSYQHLWRWGIMLLGMLMIYSTADQWWVTVYYGVPVWREANTTLFCASDAKAYSTEAHNVWATHACVPTDPNPQEIVIGNVTEDFNMWKNNMVDQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDIRNSTVTSNTTTWGEMTNCTFNITTSIKDKMKKEAALFYKIDLVEIDEEKNNSSTRYRLINCNTSAITQACPKVSFEPIPIHFCAPAGFAILKCNNKKFSGKGPCTNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVMIRSDNITDNTKNIIVQLKEAIQIFCIRPNNNTRKSINIGPGRAFYTTGDIIGDIRQAHCNISGNWNNTLKQIATQLGKQLNQTQQIIFNSSAGGDPEIVTHSFNCGGEFFYCNSSSLFNSTWTKNGTGSWQSNDTQNGNITLQCRIKQIINLWQGVGKAMYAPPISGQINCTSNITGLVLTRDGGKVINETETFRPGGGNMKDNWRSELYKYKVVRIEPLGIAPTKAKRRVVQREKRAVGLGAMFLGFLGAAGSTMGAASIALTEQARRVLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQLLGLWGCSGKLICTTTVPWNRSWGGHNKNLDDIWGNMTWMEWEKEIDNYTSLIYTLITESHSQQEKNEQELLALDKWASLWNWFDISQWLWYIKIFIMIVGGLVGLRIVFAVLSIVNKVRQGYSPLSFQTLLPAPRGADRPEGIEEGGGERDKGRSGRLVNGFLALIWDDLRSLCLFSYHRLSDLLLIVIRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNTTAIVVAEGTDRIIEILQRIGRAFLHIPRRIRQGLERALL", - "nucleotides": "ATGAGAGTGAAGGAGATCAAGAGGAGTTATCAGCACTTGTGGAGATGGGGCATCATGCTCCTTGGAATGTTGATGATCTATAGTACTGCAGACCAGTGGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAGAGAAGCAAACACCACTCTATTTTGTGCATCAGATGCTAAAGCATATAGTACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAATAGTAATAGGAAATGTAACAGAAGATTTTAACATGTGGAAGAATAACATGGTAGATCAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGTCTAAAGCCATGTGTGAAACTAACCCCACTCTGTGTTACTTTAAATTGTACTGATATTAGGAATAGTACCGTTACTTCTAATACTACTACTTGGGGAGAAATGACAAACTGCACTTTCAATATCACCACAAGCATAAAGGATAAGATGAAAAAAGAAGCGGCACTTTTTTATAAAATTGATTTAGTAGAAATAGATGAAGAGAAAAATAACAGTAGTACCAGATATAGGTTGATAAATTGTAACACCTCAGCCATTACACAAGCCTGCCCAAAGGTATCCTTTGAGCCAATTCCCATACATTTTTGTGCCCCGGCTGGTTTTGCAATTCTAAAGTGTAATAATAAGAAGTTCAGTGGAAAGGGACCATGTACAAATGTCAGCACAGTGCAATGTACACATGGAATTAAGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAATGATTAGATCTGACAATATCACAGACAATACTAAAAACATAATAGTACAGTTGAAGGAAGCCATACAAATTTTCTGTATAAGACCCAACAACAATACAAGAAAAAGTATAAATATAGGACCAGGGAGAGCATTTTACACAACAGGAGATATAATAGGAGACATAAGGCAAGCACATTGTAACATTAGTGGAAATTGGAATAACACTTTAAAACAGATAGCTACACAATTAGGAAAACAACTGAATCAAACACAACAAATAATCTTTAATTCATCCGCAGGAGGGGACCCAGAGATTGTAACACACAGTTTTAATTGTGGAGGGGAATTCTTCTATTGTAATTCATCATCACTGTTTAATAGTACCTGGACTAAAAATGGTACTGGTAGTTGGCAGTCTAATGATACTCAGAATGGTAATATCACACTCCAATGCAGAATAAAACAAATTATAAACCTGTGGCAGGGAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAACTGTACATCAAATATTACAGGGCTGGTTTTAACAAGAGATGGGGGGAAGGTGATTAATGAAACTGAGACCTTTAGACCTGGAGGAGGAAATATGAAGGATAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAGAATTGAACCATTAGGAATAGCACCCACCAAGGCAAAAAGAAGAGTGGTGCAAAGAGAGAAAAGAGCAGTAGGACTGGGAGCTATGTTCCTTGGGTTCTTGGGAGCAGCCGGAAGCACTATGGGCGCAGCGTCAATAGCGCTGACGGAACAGGCCAGACGAGTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATTTGTTGCAACTCACAGTCTGGGGCATTAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCTCCTAGGACTTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTACTGTGCCTTGGAATCGTAGTTGGGGTGGGCATAACAAAAATCTAGATGACATTTGGGGTAACATGACCTGGATGGAGTGGGAAAAAGAAATTGACAATTACACAAGCTTAATATACACCTTAATTACAGAATCACATTCCCAACAAGAAAAGAATGAACAAGAATTATTGGCATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTGACATATCACAATGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAAAGTTAGGCAGGGATACTCACCATTATCATTTCAGACCCTCCTCCCAGCTCCGAGGGGAGCCGACAGGCCCGAAGGAATAGAAGAAGGAGGTGGAGAGAGAGACAAAGGCAGATCCGGTCGATTAGTGAACGGATTCTTAGCACTTATCTGGGACGATCTACGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGCGACTTACTCTTGATTGTGATCAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTGCAGTACTGGAGTCAGGAACTAAAGAATAGTGCTGTCAGCTTGCTCAACACCACAGCTATAGTAGTAGCTGAAGGGACAGATAGGATAATAGAAATATTACAAAGAATTGGTAGAGCTTTTCTCCACATACCTAGGAGAATAAGACAGGGCTTAGAAAGGGCTTTGCTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 7869, - "end": 7961, - "orientation": "forward", - "distance": 0.7741935483870968, - "indel_impact": 32, - "protein": "RPSSQLRGEPTGPKE", - "nucleotides": "AGACCCTCCTCCCAGCTCCGAGGGGAGCCGACAGGCCCGAAGGAATAGAAGAAGGAGGTGGAGAGAGAGACAAAGGCAGATCCGGTCGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 7870, - "end": 8145, - "orientation": "forward", - "distance": 0.2934782608695652, - "indel_impact": 0, - "protein": "DPPPSSEGSRQARRNRRRRWRERQRQIRSISERILSTYLGRSTEPVPLQLPPLERLTLDCDQDCGTSGTQGVGSPQILVESPAVLESGTKE", - "nucleotides": "GACCCTCCTCCCAGCTCCGAGGGGAGCCGACAGGCCCGAAGGAATAGAAGAAGGAGGTGGAGAGAGAGACAAAGGCAGATCCGGTCGATTAGTGAACGGATTCTTAGCACTTATCTGGGACGATCTACGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGCGACTTACTCTTGATTGTGATCAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTGCAGTACTGGAGTCAGGAACTAAAGAATAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8289, - "end": 8939, - "orientation": "forward", - "distance": 0.534862385321101, - "indel_impact": 0, - "protein": "MGGKWSKSSRVGWNAVRERMRRAQPTADRERAEPAADGVGAASRDLEKYGALTSRNTAATNADCAWLEAQEEEDEVGFPVRPQVPLRPMTYKAALDLSHFLKEKGGLEGLVYSQKRQDILDLWIYHTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPLEPDQVEEANEGENNSLLHPLSQHGMDDPEKEVLVWRFDSRLAFHHMAREKHPEFYKDC", - "nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTAGGGTTGGATGGAATGCAGTGAGGGAAAGAATGAGACGAGCTCAGCCAACAGCAGATAGGGAACGAGCTGAGCCAGCAGCAGATGGGGTGGGAGCAGCATCTAGAGACCTGGAAAAATATGGAGCACTTACAAGTAGGAATACAGCAGCTACTAATGCTGATTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGATGAGGTGGGTTTTCCAGTCAGACCTCAGGTACCTTTAAGACCCATGACTTATAAAGCAGCTCTAGATCTGAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAGTTTACTCCCAAAAAAGGCAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTATTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATCAGATATCCCCTGACCTTTGGATGGTGCTTCAAGCTAGTACCACTTGAGCCAGATCAGGTAGAAGAGGCCAATGAAGGAGAGAACAACAGCCTGTTACACCCTCTGAGCCAGCATGGGATGGATGACCCGGAGAAAGAAGTGCTAGTGTGGAGGTTTGACAGCCGCCTCGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTTCTACAAAGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115491.1": [ - { - "region": "gag", - "start": 521, - "end": 2020, - "orientation": "forward", - "distance": 0.2675944333996021, - "indel_impact": 0, - "protein": "MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", - "nucleotides": "ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGACGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGCTAATGTAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1813, - "end": 4824, - "orientation": "forward", - "distance": 0.15109343936381703, - "indel_impact": 0, - "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTACTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4769, - "end": 5347, - "orientation": "forward", - "distance": 0.36269430051813467, - "indel_impact": 0, - "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", - "nucleotides": "ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5287, - "end": 5577, - "orientation": "forward", - "distance": 0.2886597938144331, - "indel_impact": 0, - "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", - "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5558, - "end": 5772, - "orientation": "forward", - "distance": 0.25, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5697, - "end": 5772, - "orientation": "forward", - "distance": 0.46153846153846145, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5789, - "end": 6034, - "orientation": "forward", - "distance": 0.727586206896552, - "indel_impact": 0, - "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL", - "nucleotides": "ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAAAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 5952, - "end": 8492, - "orientation": "forward", - "distance": 0.5241695303550973, - "indel_impact": 0, - "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", - "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGAAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAAAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8074, - "end": 8166, - "orientation": "forward", - "distance": 0.29032258064516125, - "indel_impact": 0, - "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", - "nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8075, - "end": 8350, - "orientation": "forward", - "distance": 0.3913043478260869, - "indel_impact": 0, - "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", - "nucleotides": "GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8494, - "end": 9108, - "orientation": "forward", - "distance": 0.5333333333333332, - "indel_impact": 0, - "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", - "nucleotides": "ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTATGAGCCTGCATGGGATGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK116110.1": [ - { - "region": "gag", - "start": 117, - "end": 1600, - "orientation": "forward", - "distance": 1.730938123752496, - "indel_impact": 973, - "protein": "MSQVNSTTVMMQKGNFRNQKKTVKCFNCGKIGHIAKNCRAPRRKGCWKCGQEGHQMKDCSERQANFLGKLWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETATPPQKQEPIDRELYSLASLKSLFGNDPSSQ", - "nucleotides": "GCTGGTCCCAATGCTTTTAAAATAGTCTTACAATCTGGGTTTGCATTTTGGACCAACAAGGTTTCTGTCATCCAATTTTTTACATCCTGTGAAGCTTGCTCGGCTCTTAGGGTTTTATAGAACCGGTCTACATAGTCTCTAAAGGGTTCCTTTGGTCCTTGTTTTATGTCCAAAATGCTGACAGGACTATACATTCTTACTATTTTATTTAATCCCAGGATTACCCATCTTTTATAGATATCTCCTACTGGGATAGGTGGATTATTTGTCATCCATCCTATTTGTTCCTGAAGGGTACTAGTAGTTCCTGCTATATCACTTCCCCTTGGTTCTCTCATTTGGCCTGGTGCAACAGGCCCTGCATGCACTGGATGCAATCTATCCCATTCTGCAGCTTCCTCATTGATGGTCTCTTTTAATATTTGCATTGCTGCTTGATGTCCCCCCACTGTATTTAGCATGGTGTTTATATCTTGTGGGGTGGCTCCTTCTGCTAATGCTGAAAACATAGGTATTACTTCTGGGCTAAAAGCCTTTTCTTCTACTACTTTTACCCATGCATTTAAAGTTCTAGGTGACATGGCCTGATGTACCATTTGCCCCTGGAGGTTTTGCACTATAGGGTAGTTTTGGCTGACCTGGCTGTTATTTCCTGCGCCAGCTGCTGCTTGCTGTGCTTTCATCTTGCTTTTGTTTTGCTCTTCCTCTATCTTATCTAGCGCTCCCTTGGTGTCTTGTATCTCTATCCTTTGATGTATACAATAGAGGACCGCTACTGTATTATATAATGATTTAAGCTCTTCTGACCCTGTTTGGAGGGATGGCTGTAGCTGTCCCAGTATTTGTCTACAGCCTTCTGATGTTTCTAACAGGCCAGGATTAACTGCGAATCGTTCTAGCTCCCTGCTTGCCCATACTAGATGTTTTAACCTATATTTTTTCTTTCCTCCTGGCCTTAACCGAATTTTTTCCCATTGGTCTAATTTTCCCCCGCTTAATACTGACGCTCTCGCACCCATCTCTCTCCTTCTAGCCTCCGCTAGGTAAAAACTTTTTTGGCGTACTCACCAGTCGCCGAAGCAATGAGCCAAGTAAATTCAACTACCGTAATGATGCAGAAAGGCAATTTTAGGAACCAAAAGAAGACTGTTAAGTGTTTCAACTGTGGTAAAATAGGGCATATAGCAAAAAATTGCAGGGCCCCCAGGAGAAAGGGCTGTTGGAAATGTGGACAGGAAGGACACCAGATGAAAGATTGTAGTGAGAGACAGGCTAATTTTTTAGGGAAACTCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAGGAGAGCTTCAGGTTTGGGGAAGAGACAGCAACTCCCCCTCAGAAGCAGGAGCCGATAGACAGGGAACTATATTCCTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCTCAATAA", - "subtype_start": 140, - "subtype_end": 1642, - "subtype_aminoacids": "MGARASVLSGGQLDRWEKIRLRPGGKKKYRLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTIAVLYCVHQKIEVKDTKEALEKIEEEQNKSKKKAQQAAANTENSSQVSQNYPIVQNMQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQKGNFRNQRKIVKCFNCGKEGHIARNCRAPRKKGCWRCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPAESFRFGEETTTPSQKQETIDKELYPLTALKSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGACAATTAGATAGATGGGAGAAAATTCGGTTACGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTATTGGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCAAGCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAATAGCAGTCCTCTATTGTGTACATCAAAAGATAGAGGTAAAAGACACCAAGGAAGCTTTAGAGAAGATAGAGGAAGAACAAAACAAAAGTAAGAAAAAGGCACAGCAAGCAGCAGCTAACACAGAAAACAGCAGCCAGGTTAGCCAAAATTACCCTATAGTGCAAAATATGCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACACATAATCCACCTATCCCAGTGGGAGAAATTTACAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGCATTTTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAGACTCTAAGAGCCGAGCAAGCCTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGTCCCGGCCATAAGGCAAGAGTTTTGGCGGAAGCAATGAGCCAAGTGACAAATTCAGCTACCATAATGATGCAGAAAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCCGAAATTGCAGGGCCCCTAGGAAGAAGGGCTGTTGGAGATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAAAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCGACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGAGACAATAGACAAGGAACTATATCCTTTAACTGCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1393, - "end": 4404, - "orientation": "forward", - "distance": 0.20376984126984143, - "indel_impact": 0, - "protein": "FFRETLAFPQGEAREFPSEQTRANSPTRGELQVWGRDSNSPSEAGADRQGTIFLSFPQITLWQRPLVSIKVGGQLKEALLDTGADDTVLEEMCLPGKWKPKMIGGIGGFIKVRQYDQIPIEIYGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVRLKPGMDGPKIKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPIFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSVNNETPGIRYQYNVLPQGWKGSPAIFQASMTKILEPFRKQNPDMVIYQYMDDLYIGSDLELGQHRTKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPITLPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKSLTEVVPLTREAELELAENREILKEPVHGVYYDPSKDLIAELQKQGQGQWTYQIYQEPFKNLKTGKYARIKGTHTNDVKQLTQAVQKIATESIVIWGKTPKFRLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKDPIVGAETFYVDGAANRDTKLGKAGYVTDRGRQKIVPLTDTTNQKTELQAIYLALQDSGSEVNIVSDSQYAIGILQAQPDKSESELVNQIIEQLIKKEKVYLTWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPIVAKEIVASCDKCQLKGEAIHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVNTIHTDNGSNFTSTAVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGRYSAGERIVDMIASDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSEIKVVPRRKAKIIRDYGKQMAGDDCVAGRQDED", - "nucleotides": "TTTTTTAGGGAAACTCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAGGAGAGCTTCAGGTTTGGGGAAGAGACAGCAACTCCCCCTCAGAAGCAGGAGCCGATAGACAGGGAACTATATTCCTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCTCAATAAAAGTAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGTGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAATATGATCAGATACCCATAGAAATCTATGGACATAAAGCTATAGGTACAGTGTTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATTTGTTGACTCAGATTGGGTGCACTTTAAATTTTCCCATTAGTCCTATCGAAACTGTACCAGTAAGATTAAAGCCAGGAATGGATGGCCCAAAAATTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAGATTTCAAAGATTGGGCCTGAAAATCCATACAATACTCCAATATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGGGAACTTAATAAGAGAACTCAAGATTTCTGGGAAGTTCAATTGGGAATACCGCATCCCGCAGGATTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCATTAGATAAAGACTTTAGGAAGTATACTGCATTTACCATACCCAGTGTAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAGCTAGCATGACAAAAATTTTAGAGCCTTTTAGGAAGCAAAATCCAGACATGGTTATTTATCAATACATGGATGATCTATATATAGGATCTGACTTGGAATTAGGACAGCATAGGACAAAAATAGAGGAACTGAGACAACATCTATTGAGGTGGGGGTTTACCACACCAGACAAGAAGCATCAGAAAGAACCTCCATTCCTCTGGATGGGGTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAACACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTAGGAAAATTAAATTGGGCAAGTCAGATTTATGCAGGGATTAAAGTAAGGCAGTTATGTAAACTCCTTAGAGGAACCAAATCACTAACAGAAGTAGTACCACTAACAAGAGAGGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCGGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAATTACAAAAGCAGGGACAAGGCCAGTGGACTTATCAGATTTATCAAGAACCATTTAAAAATCTGAAAACAGGAAAGTATGCAAGAATAAAGGGTACCCACACTAATGATGTAAAACAATTAACACAGGCTGTGCAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCCATACAAAAAGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATCCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAGAAAGACCCCATAGTAGGAGCAGAAACATTCTATGTCGATGGGGCAGCCAATAGGGATACTAAATTAGGAAAAGCAGGATATGTTACTGACAGGGGAAGACAAAAAATTGTCCCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTACCTAGCTCTGCAGGATTCAGGATCAGAAGTAAACATAGTATCAGACTCACAGTATGCAATAGGAATTCTTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGACATGGGTGCCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTATTATTCTTAGATGGAATAGATAAGGCCCAAGAAGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCCATAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGTTAAAAGGAGAAGCCATACATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTGGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTGGCCAGTGGATATATTGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATATTTTATCTTAAAATTAGCAGGAAGATGGCCAGTAAACACAATACATACAGACAATGGCAGCAACTTCACTAGCACTGCGGTTAAAGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGGGATCAAGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGAGGTACAGTGCAGGGGAAAGAATAGTAGACATGATAGCATCAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAACAGTGAAATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGACTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGATTAA", - "subtype_start": 1435, - "subtype_end": 4446, - "subtype_aminoacids": "FFRENLAFPQGKAREFSSEQTRADSPTSRELQVWGRDNNSLSEAGDNRQGTISFNCPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMNLPGRWKPKMIGGIGGFIKVRQYDQILVEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQCSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTKEAELELAENREILKETVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESEIVSQIIEQLIQKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESINKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCGACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGAGACAATAGACAAGGAACTATATCCTTTAACTGCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGGGCAGATGATACAGTATTAGAAGAAATGAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAATATGATCAGATACTTGTAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATTTGTTGACTCAGCTTGGTTGCACTTTAAATTTTCCTATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAAAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTGAAAAAGAAAAAATCAGTAACGGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATAAAGACTTCAGGAAATATACTGCATTTACCATACCGAGTACAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAATGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTGAGGTGGGGATTTACCACACCAGATAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAACCTATAGTGCTGCCGGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAAGCAGTTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAGTAGTACCACTAACAAAAGAGGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTCTAAAAGAAACAGTACATGGAGTGTATTATGACCCATCAAAAGATTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTAAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTCAAACTACCCATACAAAAAGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAAAAAGAGCCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCGGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTGACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAAAAGACTGAGTTACAAGCAATTCACCTAGCTTTGCAGGATTCGGGATTAGAAGTAAATATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGATAGTCAGTCAAATAATAGAGCAGTTAATACAAAAGGAAAAGGTCTACCTGGCATGGGTACCAGCACACAAGGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGAGCATGAAAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAGGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTACTACGGTTAAGGCTGCCTGTTGGTGGGCGGGGATCAAGCAGGAATTTGGCATCCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATAAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAAACAGCAGTACAAATGGCAGTATTTATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAGACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTGTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAA" - }, - { - "region": "vif", - "start": 4349, - "end": 4927, - "orientation": "forward", - "distance": 0.546875, - "indel_impact": 0, - "protein": "MENRWQVMIVWQVDRMRINAWKSLVKHHMHVSRKVERWVYKHHYESTNPRISSEVHIPLGDARLKITTYWGLHTGERDWHLGQGVSIEWRKKSYNTQVDPEVADQLIHLYYFDCFSESAIRKAIVGHRVSPSCEYQAGHNKVGSLQYLALAALVKSKKTKPPLPSVTKLTEDRWNKPQRTKGRRGNHIMNGH", - "nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGATTAATGCATGGAAAAGCTTAGTAAAGCACCATATGCATGTTTCAAGGAAAGTTGAGAGATGGGTTTATAAACATCACTATGAAAGTACTAATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTAAAAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGCCAGGGAGTCTCCATAGAATGGAGAAAAAAGAGCTATAATACACAAGTAGACCCTGAAGTAGCAGACCAACTAATCCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAAAGCCATAGTAGGACATAGAGTTAGTCCTAGTTGTGAATATCAAGCAGGACATAACAAGGTAGGATCCCTACAGTACTTGGCATTAGCAGCATTAGTAAAATCAAAAAAGACAAAGCCACCTTTGCCTAGCGTTACGAAGCTGACGGAGGATAGATGGAACAAGCCCCAGAGGACCAAGGGCCGCAGAGGGAACCATATAATGAATGGGCACTAG", - "subtype_start": 4391, - "subtype_end": 4969, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIKTWKSLVKHHMYVSKKAKGWLYRHHYQSIHPRISSEVHIPLGEASLVIKTYWGLHTGEREWHLGQGVSIEWRKGRYNTQVDPGLADQLIHLYYFDCFSESAIRNAILGHIVSPSCEYQAGHNKVGSLQYLALTALRTPKKIKPPLPSVRKLTEDRWNKPQKTKGHRESHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAAAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAAAGAAAGCTAAGGGATGGTTGTATAGACATCACTATCAAAGCATTCATCCAAGAATAAGTTCAGAAGTACATATCCCACTAGGAGAGGCTAGCTTGGTAATAAAGACATATTGGGGTCTGCATACAGGAGAAAGAGAATGGCATTTGGGTCAAGGAGTCTCCATAGAATGGAGGAAAGGAAGATATAACACACAAGTAGACCCAGGCCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGTTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCACTAACAGCATTAAGAACACCAAAGAAGATAAAGCCACCTTTGCCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 4867, - "end": 5157, - "orientation": "forward", - "distance": 0.514285714285714, - "indel_impact": 0, - "protein": "MEQAPEDQGPQREPYNEWALELLEELKREAVRHFPRIWLQSLGQYVYETYGDTWTGVEAIIRILQQMLFIHFRIGCQHSRIGIIRRGRTRNGASRP", - "nucleotides": "ATGGAACAAGCCCCAGAGGACCAAGGGCCGCAGAGGGAACCATATAATGAATGGGCACTAGAGCTTTTAGAGGAGCTTAAGAGAGAAGCTGTTAGACATTTCCCTAGGATATGGCTTCAGAGCTTAGGACAATACGTCTATGAAACTTATGGGGATACTTGGACAGGAGTGGAAGCCATAATAAGAATTTTGCAACAAATGCTGTTCATTCATTTCAGAATTGGGTGTCAACATAGCAGAATAGGCATTATTCGACGAGGGAGAACAAGAAATGGAGCCAGTAGACCCTAG", - "subtype_start": 4909, - "subtype_end": 5199, - "subtype_aminoacids": "MEQAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRTWLHGLGQYIYENYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGITLQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACACTTTCCTAGGACATGGCTCCACGGATTAGGGCAATATATCTATGAAAATTATGGGGACACTTGGGCAGGAGTGGAGGCCATAATAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGGATTGGGTGTCGACATAGCAGAATAGGCATTACTCTACAAAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5138, - "end": 5352, - "orientation": "forward", - "distance": 0.5890410958904109, - "indel_impact": 0, - "protein": "MEPVDPSLAPWKHPGSQPKTACTNCYCKKCCLHCQVCFTKKGLGISYGRKKRRQRRRPPQSSKAHQNPLPKX", - "nucleotides": "ATGGAGCCAGTAGACCCTAGCCTAGCGCCCTGGAAGCACCCAGGAAGTCAGCCTAAGACTGCTTGTACCAATTGCTATTGTAAAAAGTGCTGCTTACATTGCCAAGTTTGTTTCACAAAAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAAGCAGTAAGGCTCATCAAAATCCTCTACCAAAGCA", - "subtype_start": 5180, - "subtype_end": 5394, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACNTCYCKKCCFHCQVCFTKKALGISYGRKKRRQRRRAPQDRQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAGACGGCTTGTAACACTTGCTATTGTAAAAAATGTTGCTTTCATTGCCAAGTTTGTTTCACAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAGGACCGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5277, - "end": 5352, - "orientation": "forward", - "distance": 0.46153846153846145, - "indel_impact": 0, - "protein": "MAGRSGDSDEDLLKAVRLIKILYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAAGCAGTAAGGCTCATCAAAATCCTCTACCAAAGCA", - "subtype_start": 5319, - "subtype_end": 5394, - "subtype_aminoacids": "MAGRSGDSDEELLRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAGGACCGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5369, - "end": 5614, - "orientation": "forward", - "distance": 0.536144578313253, - "indel_impact": 0, - "protein": "MQSLQIGAIVALVVGTIIAIVVWSIVLIEYRKILRQKKIDRIIDRIVERAEDSGNESEGDQEELSALVERGHDAPWNVNDL", - "nucleotides": "ATGCAATCTTTGCAAATAGGAGCAATAGTAGCATTAGTAGTAGGAACAATAATAGCAATAGTTGTGTGGTCTATAGTACTCATAGAATATAGGAAAATATTAAGACAAAAGAAAATAGATAGAATAATAGATAGAATAGTAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGACCAGGAAGAGTTATCAGCACTGGTGGAAAGGGGGCATGATGCTCCTTGGAATGTTAATGATCTGTAG", - "subtype_start": 5411, - "subtype_end": 5656, - "subtype_aminoacids": "MHSLQILGIVALVVAGIIAIVVWSIVIIEYRKILRQRKIDRLIDRIIERAEDSGNESEGDQEELSALVEMGHLAPWDIND**", - "subtype_nucleotides": "ATGCACTCTTTACAAATATTAGGAATAGTAGCATTAGTAGTAGCAGGAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGGAAAATAGACAGGTTAATTGATAGAATAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGATCAGGAAGAATTGTCAGCACTTGTGGAGATGGGGCATCTTGCTCCTTGGGATATTAATGATTAGTAG" - }, - { - "region": "env", - "start": 5532, - "end": 8072, - "orientation": "forward", - "distance": 0.5598388952819338, - "indel_impact": 0, - "protein": "MRVKETRKSYQHWWKGGMMLLGMLMICSAATNLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVLLGNVTEDFNAWKNNMVEQMHEDIISLWDQSLKPCVKLTPLCVILHCTDVNNTRNGMTGELKNCSFNITTKITNKVQKEYALFYKLDVVPINNKDNDTSFNNNSYRLISCNTSVITQACPKVSFEPIPIHYCTPAGYAILRCNNETFSGKGPCTNVSSIQCTHGIRPVVSTQLLLNGSLAKQEVVIRSQNFSDNVKTIIVQLKTPVKINCTRPNNNTRKSIHAGPGKVIYATGEIIGDIRQAHCNISAAEWNDTLGQIVTKLQEQFGNKTIVFNQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWNNNGTNTWNSTGNITLPCKIRQIVNMWQKVGKAMYAPPIRGQIKCSSNITGLLLTRDGGNESESETFRPGGGDMRDNWRSELYKYKVVRIEPLGLAPTKAKRRVVQREKRAIGTLGAVFLGFLGTAGSTMGAASMTLTVQARQLLSGIVQQQNNLLKAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQLLGIWGCSGKLICTTAVPWNASWSNKSLNEIWDNMTWMEWEKEISNYTQLIYTLIEESQSQQEKNEQELLALDKWDSLWSWFSITKWLWYIKIFIMIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTRFPAPRGPDRPEGIEEDGGERDRDRSTRLVTGFLPLFWDDLRSLCLFSYHRLRDLLLIAARIVELLGHRGWEILKHWWSLLQYWSQELKKSAVSLLNATAIAVAEGTDRIIEVVQRACRAILHIPVRLRQGLERALL", - "nucleotides": "ATGAGAGTGAAGGAGACCAGGAAGAGTTATCAGCACTGGTGGAAAGGGGGCATGATGCTCCTTGGAATGTTAATGATCTGTAGTGCTGCAACAAATTTGTGGGTCACAGTCTATTATGGGGTACCTGTATGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACGCATGCCTGTGTACCCACGGACCCCAACCCACAAGAAGTATTATTGGGAAATGTGACAGAAGATTTTAATGCATGGAAAAATAACATGGTAGAGCAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCTTAAAGCCATGTGTAAAATTAACCCCACTTTGTGTTATTTTGCATTGCACTGATGTCAACAATACTAGAAATGGGATGACAGGAGAACTAAAAAACTGCTCTTTCAATATCACCACAAAAATAACAAATAAGGTACAGAAAGAATATGCACTCTTTTATAAACTTGATGTAGTACCAATAAATAATAAGGATAATGATACTAGCTTTAATAATAATAGCTATAGGTTGATAAGTTGTAACACCTCAGTTATTACACAGGCTTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTACTGTACCCCGGCTGGTTATGCAATTCTAAGGTGTAACAATGAGACATTCAGTGGAAAAGGGCCATGTACAAATGTCAGCTCAATACAATGTACACATGGAATTAGGCCAGTAGTGTCAACTCAACTGCTGTTAAATGGCAGTCTAGCAAAACAGGAGGTAGTAATTAGATCTCAAAATTTCTCGGACAATGTTAAAACCATAATAGTACAGCTGAAGACCCCTGTAAAAATTAACTGTACAAGGCCCAATAACAATACAAGAAAAAGTATACATGCAGGACCAGGGAAAGTAATTTATGCAACAGGAGAAATAATAGGAGATATAAGACAAGCACATTGCAACATTAGTGCAGCAGAGTGGAATGATACTTTAGGACAGATAGTTACAAAATTACAAGAACAATTTGGGAATAAAACAATAGTCTTCAATCAATCGTCAGGAGGGGACCCAGAAATTGTAACACACAGTTTTAATTGTGGAGGGGAATTTTTTTACTGTAATTCAACACAACTGTTTAATAGTACTTGGAATAATAATGGTACTAATACTTGGAATAGTACAGGTAATATCACACTCCCATGTAAAATAAGGCAAATTGTAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCTCCTCCCATCCGTGGACAAATTAAATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAACGAGAGTGAGAGCGAAACCTTCAGACCTGGCGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAGAATTGAACCATTAGGACTAGCACCCACTAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAATAGGAACACTGGGAGCTGTGTTCCTTGGGTTCTTGGGAACAGCAGGAAGCACTATGGGCGCAGCGTCAATGACGCTGACGGTACAGGCCAGGCAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATCTGCTGAAGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATTAAACAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGCAATAAATCTCTGAATGAAATTTGGGATAACATGACCTGGATGGAGTGGGAAAAAGAAATTAGTAATTACACACAATTAATATACACTTTAATTGAAGAATCGCAGAGCCAGCAAGAAAAGAATGAACAAGAATTATTGGCACTAGATAAGTGGGACAGCTTGTGGAGTTGGTTTAGTATAACAAAATGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTAATAGGGTTAAGAATAGTTTTTACTGTACTTTCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTGTCATTTCAGACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGACGGTGGAGAGAGAGACAGAGACAGATCCACACGCTTAGTAACCGGATTCTTACCACTTTTCTGGGACGACCTGAGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGCAGCGAGGATTGTGGAACTTCTGGGACACAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTACAATATTGGAGTCAGGAACTAAAAAAAAGTGCTGTTAGCTTGCTCAATGCCACAGCTATAGCAGTAGCTGAGGGGACAGATAGGATTATAGAAGTAGTACAAAGAGCTTGTAGAGCTATTCTCCACATACCTGTAAGACTAAGACAAGGCTTAGAAAGAGCTTTGCTATAA", - "subtype_start": 5574, - "subtype_end": 8123, - "subtype_aminoacids": "MRVKEIRKNCQHLWRWGILLLGILMISSAAENLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWTNNMAEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDLRNTTNTNSTAEEMEAKGEMKNCSFNITTSIRNKLQKEYALFYKLDIVPINNDNTSYRLISCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNDKKFSGNGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTDNAKTIIVQLKEPVEINCTRPNNYTRKRITMGPGRVYYTTGEIIGDIRRAHCNISSTKWNNTLGQIVKKLKEQFNNNTIVFKKSSGGDPEIVMHSFICGGEFFFCNSTKLFNSTWNSTEGNDDGEERNITLPCRIKQIVNMWQEVGKAMYAPPIGGQIRCTSNITGLLLTRDGGNQNGTNETEIFRPGGGNMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLDEIWNNMTWMQWEREINNYTGLIYTLIEESQNQQEKNELDLLQLDKWASLWNWFDITNWLWYIKIFIMIVGGLVGLRIIFTVLSIVNRVRQGYSPLSFQTHLPAPRGPDRPGGIEEEGGERDRDTSGRLVDGFLAIFWVDLRNLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVLQRVYRAILNIPTRIRQGLERALL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGATCAGGAAGAATTGTCAGCACTTGTGGAGATGGGGCATCTTGCTCCTTGGGATATTAATGATTAGTAGTGCTGCAGAAAATTTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAAGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAAYATGTGGACAAATAACATGGCAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGTCTAAAGCCATGTGTAAAATTAACTCCACTCTGTGTTACTTTAAATTGCACTGATTTGAGAAATACTACTAATACCAATAGTACCGCCGAGGAAATGGAGGCGAAAGGAGAAATGAAAAACTGCTCTTTCAATATCACCACAAGCATAAGGAATAAGTTGCAGAAAGAATATGCACTCTTTTATAAACTTGATATAGTACCAATAAATAATGATAATACTAGCTATAGGTTGATAAGTTGTAACACCTCAGTCATTACCCAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAATGATAAGAAGTTCAGTGGAAACGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTATCAACTCAACTGCTGCTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTTACAGACAATGCTAAAACCATAATAGTACAGCTGAAAGAACCTGTAGAAATTAATTGTACAAGACCTAACAACTATACAAGGAAAAGAATAACTATGGGACCAGGGAGAGTATATTATACAACAGGAGAAATAATAGGAGATATAAGACGAGCACATTGTAACATTAGTAGCACAAAATGGAATAACACTTTAGGACAGATAGTTAAAAAATTAAAAGAACAATTTAACAATAATACAATAGTCTTTAAGAAATCCTCAGGAGGGGACCCAGAAATTGTAATGCACAGTTTTATTTGTGGAGGGGAATTTTTCTTCTGTAATTCAACAAAACTGTTTAATAGTACTTGGAATAGCACTGAAGGAAATGACGATGGAGAGGAAAGAAATATCACACTCCCATGCAGAATAAAACAAATTGTAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCGGAGGACAAATTAGATGCACCTCAAATATTACAGGGCTGCTATTAACAAGAGATGGAGGTAACCAAAATGGGACCAACGAGACTGAAATCTTCAGACCTGGAGGAGGAAATATGAGGGACAATTGGAGAAGTGAACTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACTAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAGGCCAGACTATTATTGTCTGGTATAGTGCAACAGCAGAACAATCTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCTTAGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATCTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGATGAGATTTGGAATAACATGACCTGGATGCAATGGGAAAGAGAAATTAACAATTACACAGGCTTAATATACACCTTAATTGAAGAATCGCAAAACCAACAAGAAAAGAATGAACTAGATTTACTGCAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTGACATAACAAACTGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGATTGGTAGGTTTAAGAATAATTTTTACTGTACTTTCTATAGTGAATAGGGTTAGGCAGGGATACTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGGAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACACATCCGGACGCTTAGTGGATGGATTCTTAGCAATTTTCTGGGTCGATCTGCGGAACCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCTATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTATTACAAAGAGTTTATAGAGCTATTCTCAACATACCTACAAGAATCAGACAGGGCTTGGAAAGGGCTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 7654, - "end": 7746, - "orientation": "forward", - "distance": 0.4838709677419355, - "indel_impact": 0, - "protein": "RPASQPRGDPTGPKESKKTVERETETDPHA", - "nucleotides": "AGACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGACGGTGGAGAGAGAGACAGAGACAGATCCACACGCTTAG", - "subtype_start": 7705, - "subtype_end": 7797, - "subtype_aminoacids": "RPTSQPRGDPTGPEESKKKVERETETHPDA*", - "subtype_nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGGAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACACATCCGGACGCTTAG" - }, - { - "region": "rev_exon2", - "start": 7655, - "end": 7930, - "orientation": "forward", - "distance": 0.48913043478260865, - "indel_impact": 0, - "protein": "DPLPSPEGTRQARRNRRRRWRERQRQIHTLSNRILTTFLGRPEEPVPLQLPPLERLTLDCSEDCGTSGTQGVGNPQTLVESPTILESGTKKKCC", - "nucleotides": "GACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGACGGTGGAGAGAGAGACAGAGACAGATCCACACGCTTAGTAACCGGATTCTTACCACTTTTCTGGGACGACCTGAGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGCAGCGAGGATTGTGGAACTTCTGGGACACAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTACAATATTGGAGTCAGGAACTAAAAAAAAG", - "subtype_start": 7706, - "subtype_end": 7981, - "subtype_aminoacids": "DPPPSPEGTRQARRNRRRRWRERQRHIRTLSGWILSNFLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGGAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACACATCCGGACGCTTAGTGGATGGATTCTTAGCAATTTTCTGGGTCGATCTGCGGAACCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8074, - "end": 8694, - "orientation": "forward", - "distance": 0.48142857142857154, - "indel_impact": 0, - "protein": "MGGKWSKSSVVGWPAVRERIRRAGPAAEGVGAVSRDLDKHGAITSNNTPATNAACAWLEAQEEEEVGFPVRPQVPLRPMTYKGALDLSHFLKEKGGLEGMIYSQQRQDILDLWVYHTQGYFPDWQNYTPGPGVRFPLTFGWCFKLVPLETEQVEAATGGENNCLLHPLNQHGMDDPEREVLMWKFDSSLAFHHRAKELHPEYYKDC", - "nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGGTTGGATGGCCTGCTGTAAGGGAAAGAATAAGAAGAGCTGGGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGACAAACATGGAGCAATCACAAGTAACAATACACCAGCTACTAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTTAGGCCTCAAGTACCTTTAAGACCAATGACTTACAAGGGAGCTTTGGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGATGATATACTCCCAGCAAAGACAAGATATCCTTGATCTGTGGGTCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGACCAGGGGTCAGGTTTCCACTGACCTTTGGATGGTGCTTCAAACTAGTACCACTTGAGACAGAGCAGGTAGAAGCGGCCACTGGAGGAGAGAACAACTGCTTGTTACACCCTTTGAACCAGCATGGGATGGATGACCCGGAGAGAGAAGTACTAATGTGGAAGTTTGACAGCAGCCTAGCATTTCATCACAGAGCCAAAGAGCTGCATCCGGAGTACTACAAAGACTGCTGA", - "subtype_start": 8125, - "subtype_end": 8751, - "subtype_aminoacids": "MGSKWSKMSGWPAVRERMRRTKPAEPAADGVGAASRDLEKHGAITSSNTAATNADCAWLEAQEEEGEVGFPVKPQVPLRPMTYKGALDLSHFLKEKGGLEGLIYSQQRQDILDLWVYHTQGYFPDWQNYTPGPGVRFPLTFGWCFKLVPVEPDKVEEANEGENNCLLHPMSQHGMEDPEKEVLMWKFDSRLALHHMAREKHPEYYKDC*", - "subtype_nucleotides": "ATGGGTAGCAAGTGGTCAAAAATGAGTGGGTGGCCTGCTGTAAGGGAAAGAATGAGAAGAACTAAGCCAGCTGAGCCAGCAGCAGATGGAGTGGGAGCAGCATCTAGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGGGGAGGTGGGTTTCCCAGTCAAACCTCAGGTACCTTTAAGACCAATGACTTACAAGGGAGCTCTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAACAAAGACAAGATATCCTTGATCTGTGGGTCTACCATACACAAGGCTACTTCCCTGATTGGCAGAATTACACACCAGGGCCAGGGGTCAGATTCCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGAGCCAGACAAGGTAGAAGAGGCCAATGAAGGGGAAAACAACTGCTTGTTACACCCTATGAGCCAGCATGGGATGGAAGACCCGGAGAAAGAAGTGTTAATGTGGAAGTTTGACAGCCGCCTGGCATTGCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAGGACTGCTGA" - } - ], - "MK115527.1": [ - { - "region": "gag", - "start": 683, - "end": 2182, - "orientation": "forward", - "distance": 0.2675944333996021, - "indel_impact": 0, - "protein": "MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", - "nucleotides": "ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGACGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGCTAATGTAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1975, - "end": 4986, - "orientation": "forward", - "distance": 0.15109343936381703, - "indel_impact": 0, - "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTACTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4931, - "end": 5509, - "orientation": "forward", - "distance": 0.36269430051813467, - "indel_impact": 0, - "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", - "nucleotides": "ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5449, - "end": 5739, - "orientation": "forward", - "distance": 0.2886597938144331, - "indel_impact": 0, - "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", - "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5720, - "end": 5934, - "orientation": "forward", - "distance": 0.25, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5859, - "end": 5934, - "orientation": "forward", - "distance": 0.46153846153846145, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5951, - "end": 6196, - "orientation": "forward", - "distance": 0.727586206896552, - "indel_impact": 0, - "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL", - "nucleotides": "ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAAAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 6114, - "end": 8654, - "orientation": "forward", - "distance": 0.5241695303550973, - "indel_impact": 0, - "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", - "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGAAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAAAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8236, - "end": 8328, - "orientation": "forward", - "distance": 0.29032258064516125, - "indel_impact": 0, - "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", - "nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8237, - "end": 8512, - "orientation": "forward", - "distance": 0.3913043478260869, - "indel_impact": 0, - "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", - "nucleotides": "GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8656, - "end": 9270, - "orientation": "forward", - "distance": 0.5333333333333332, - "indel_impact": 0, - "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", - "nucleotides": "ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTATGAGCCTGCATGGGATGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK114997.1": [ - { - "region": "gag", - "start": 210, - "end": 1718, - "orientation": "forward", - "distance": 0.24035785288270395, - "indel_impact": 0, - "protein": "MGARASVLSGGELDRWEKIYLRPGGKKKYRLKHIVWASRELERFAVNPGLLESSEGCRQILGQLQPSLQTGSEELRSLYNTVAVLYCVHQRINVKDTKEALDTIEEEQNKSKKKAQQAAADTGNSSNSQVSQNYPIVQNLQGQMVHQPISPSTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINDEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHTPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSASIMAQGGNFRNQKRNVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPAESFRFGEEATAPPQKQETKDKELYPLASLRSLFGNDP", - "nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGCGGAGAATTAGATAGATGGGAAAAAATTTACCTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTCAACCCTGGCCTGTTAGAGTCATCAGAAGGCTGCAGGCAAATTCTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAGTCCTCTATTGCGTACATCAAAGGATAAATGTAAAAGACACCAAGGAAGCTCTAGACACAATAGAGGAAGAACAAAACAAAAGTAAGAAAAAGGCACAACAAGCAGCAGCTGACACAGGAAACAGCAGCAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACCTACAGGGGCAAATGGTACATCAACCCATATCACCTAGCACTTTAAATGCATGGGTAAAAGTGATAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCACTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAGATGTTAAAAGAAACCATCAATGATGAGGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGCCCTATTGCACCAGGCCAGATGAGGGAACCAAGGGGAAGTGACATAGCAGGAACCACTAGTACCCTTCAGGAACAAATAGGATGGATGACACATACTCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTGGGGTTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTTTGGACATAAGACAAGGACCAAAGGAACCCTTTAGGGATTATGTAGATCGGTTCTATAAAACTCTAAGGGCTGAGCAAGCGTCACAGGATGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACCATTTTAAAAGCATTAGGACCAGCAGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGGGGACCCGGCCACAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCCTCCATAATGGCGCAAGGAGGCAATTTTAGGAACCAAAAAAGGAATGTTAAGTGTTTCAATTGTGGCAAAGAAGGACACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATTTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTGGGGAAGAGGCAACAGCTCCCCCTCAGAAGCAGGAGACGAAGGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1511, - "end": 4522, - "orientation": "forward", - "distance": 0.22266401590457252, - "indel_impact": 0, - "protein": "FFREDLAFPQGKAREFSPEQTRANSPASRELQVWGRGNSSPSEAGDEGQGTVSFSFPQITLWQRPIISIRIGGQLKEALLDTGADDTVLEEMNLPGKWKPKMIGGIGGFIKVRQYDQVPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEDKIKALIEICAEMEKEGKITKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKRKKSVTVLDVGDAYFSVPLDQDFRKYTAFTIPSINNEAPGVRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKENPDIVIYQYMDDLYVGSDLEIEQHRTKIEELRQHLLGWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPDKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKEPVHGAYYDPSKDLIAEVQKQGGDQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIALEAIVIWGKTPKFKLPIQKETWEMWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRETKLGKAGYVTDRGRQKVVSLMDTTNQRTELHAIHLALQDSGSTVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKIYLAWVPAHKGIGGNEQVDKLVSNGIRKILFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEPMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIAAETGQETAYFILKLAGRWPVKIIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNNELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQISKIQNFRVYYRDSREPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKILRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAGATTTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTGGGGAAGAGGCAACAGCTCCCCCTCAGAAGCAGGAGACGAAGGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAAGAATAGGAGGGCAACTAAAAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGGTACCCATAGAAATTTGTGGACATAAAGCTATAGGTACAGTATTAATAGGACCTACACCAGTCAACATAATTGGAAGAAATCTGTTGACCCAGCTTGGTTGCACTTTAAATTTTCCCATCAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAGAGTTAAACAATGGCCATTGACAGAAGACAAAATAAAGGCATTAATAGAAATTTGTGCAGAAATGGAAAAGGAAGGGAAAATTACAAAAATTGGGCCTGAGAATCCATATAATACTCCAGTGTTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCCGAGAACTTAATAAGAGAACACAAGACTTCTGGGAAGTTCAACTAGGAATACCACATCCCGCAGGGTTAAAAAGGAAAAAATCAGTAACAGTACTAGATGTGGGTGATGCATATTTTTCAGTTCCTTTAGACCAAGACTTCAGAAAGTATACTGCATTTACCATACCCAGTATAAACAATGAGGCACCAGGAGTTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGGAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGAGCAGCACAGAACAAAAATAGAGGAACTGAGACAACATCTGTTGGGGTGGGGACTCACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATACAGCTGCCAGACAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAGTTGAATTGGGCAAGCCAGATCTATCCAGGGATTAAAGTAAAGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGAACCAGTCCATGGAGCATATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGGGGAGACCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGGAAATATGCAAGAACGAGGGGTGCCCACACTAATGATGTAAAACAGTTAACAGAGGCAGTGCAAAAAATAGCCCTAGAAGCCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCTATACAAAAGGAAACATGGGAAATGTGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAATTTGTCAATACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAGAAAGAACCCATAATAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAGACTAAATTAGGAAAAGCAGGGTATGTTACGGACAGAGGAAGACAAAAGGTTGTCTCCCTAATGGACACAACAAATCAGAGGACTGAGTTACACGCAATTCATCTAGCTTTGCAGGATTCAGGATCAACAGTAAACATAGTAACAGACTCACAATATGCCTTAGGGATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGCTAATAAAAAAGGAAAAGATTTACCTGGCATGGGTCCCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTAATGGAATCAGAAAAATACTATTTCTGGATGGAATAGATAAGGCCCAAGAAGAACATGAAAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAGTGTCAGTTAAAAGGAGAACCTATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTGGATTGTACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATACATAGAAGCAGAAGTTATTGCCGCAGAAACAGGGCAGGAAACAGCATACTTTATCTTAAAATTGGCAGGAAGATGGCCAGTAAAAATAATACATACAGACAATGGCAGCAATTTCACCAGCACTACGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGTATTCCCTACAATCCTCAAAGTCAAGGAGTAGTAGAATCTATGAATAATGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATTCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAACTACAAAAACAAATTTCAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGAACCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCCTTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4467, - "end": 5045, - "orientation": "forward", - "distance": 0.3969072164948453, - "indel_impact": 0, - "protein": "MENRWQVMIVWQVDRMRIRTWNSLVKHHMHISKKAQRWVYRHHYESHNPKTSSEVHIPLGEARLVIKTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPSLADQLIHLYYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALTALITPKRRKPPLPSVTKLTEDRWNKSQRTKGHKGSHTMNGH", - "nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATATTTCAAAGAAAGCTCAGAGATGGGTTTACAGACATCACTATGAAAGCCATAATCCAAAAACAAGTTCAGAAGTACACATCCCATTAGGGGAAGCAAGATTGGTAATAAAAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGCCAGGGAGTATCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTAGCCTGGCAGACCAACTAATTCACCTGTATTATTTTGATTGTTTTTCAGAATCTGCTATAAGGAATGCCATATTAGGACATAGAGTTAGCCCTAGGTGTGAATATCAGGCAGGACATAACAAGGTAGGATCCCTACAATACTTGGCATTAACAGCATTAATAACACCAAAAAGGAGAAAGCCACCCTTGCCCAGTGTTACAAAATTAACAGAGGACAGATGGAACAAGTCCCAGAGGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGACATTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 4985, - "end": 5275, - "orientation": "forward", - "distance": 0.28125, - "indel_impact": 0, - "protein": "MEQVPEDQGPQREPYNEWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDTWAGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRARNGASRS", - "nucleotides": "ATGGAACAAGTCCCAGAGGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGACATTAGAGCTCTTAGAAGAGCTTAAGAGTGAAGCTGTTAGGCATTTTCCTAGACCATGGCTTCATAGCTTAGGACAATATATCTATGAAACTTATGGAGATACTTGGGCAGGGGTGGAAGCCATAATAAGAATCCTGCAACAACTGTTGTTCATTCATTTCAGAATTGGGTGTCAACATAGCAGGATAGGAATTATTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5256, - "end": 5470, - "orientation": "forward", - "distance": 0.547945205479452, - "indel_impact": 0, - "protein": "MEPVDPSLEPWKHPGSQPMTACNNCYCKRCCFHCQVCFTTKGLGISYGRKKRRQRRRASHSSQNHQAALPEX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGCTTAGAGCCCTGGAAACATCCAGGAAGTCAACCTATGACTGCTTGTAACAATTGCTATTGTAAACGGTGTTGCTTTCATTGCCAAGTTTGCTTCACCACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5395, - "end": 5470, - "orientation": "forward", - "distance": 0.5925925925925926, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLTAVRIIKRLYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5487, - "end": 5732, - "orientation": "forward", - "distance": 0.7790697674418607, - "indel_impact": 0, - "protein": "MHALKIAAIVGLVVATIIAIVVWSIVLIEYRKILRQRKIDRLIDRIRERAEDSGNESDGDQEELSALVEMGHLVPWDGDDM", - "nucleotides": "ATGCATGCCTTAAAAATAGCAGCAATAGTAGGATTAGTAGTAGCAACAATAATAGCAATAGTTGTGTGGTCTATAGTATTAATAGAATATAGGAAAATTTTAAGACAAAGGAAAATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAAAGTGACGGAGACCAGGAGGAATTATCAGCACTTGTGGAGATGGGGCATCTTGTTCCTTGGGATGGTGATGATATGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 5650, - "end": 8206, - "orientation": "forward", - "distance": 1.2346375143843504, - "indel_impact": 1167, - "protein": "MHSFNCGGEFFYCNTTQLFNSTWNGTDNWNGTESNNTITLPCRIKQIINLWQEVGRAMYAPPIQGQIRCSSNITGLLLVRDGGSNNSSNDTETFRPGGGDMRDNWRSELYKYKVVKIEPLGIAPTHAKRRVVQREKRAVGLGAFFLGFLGAAGSTMGAASVTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLALERYLKDQQLLGIWGCSGKLICTTNVPWNISWSPRWNRSLDEIWTNMTWRQWEKEIDNYTDTIYNLIELSQNQQEQNEQDLLALDKWASLWSWFDITQWLWYIKIFIMIVGGLIGLRIVFTILSIVNRVRQGYSPLSLQTLLPTQRGPDRPEGTEEGGGERDRGTSTRLVHGFLALIWDDLRSLFLFSYHRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWSQEIKNSAVSLLNTTAIAVAEGTDRIIEVLQRGFRAILHIPTRIRQGLERALL", - "nucleotides": "ATGAAAGTGACGGAGACCAGGAGGAATTATCAGCACTTGTGGAGATGGGGCATCTTGTTCCTTGGGATGGTGATGATATGTAGTGCCAACAACTTGTGGGTCACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAACTACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGAGACAGAGAAGCATAATGTTTGGGCAACCCATGCCTGTGTACCCACAGACCCCAGCCCACAGGAAGTAGCATTGAAAAATGTGACAGAAACATTTAACATGTGGAAAAATGACATGGTAGAGCAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTGAAACTAACCCCACTCTGTGTTACTTTAAATTGCACTGATAGTTTAAGTAATAATACTAATTATAATATAACGGAAAAAGGAGAAATAAAAAACTGCTCTTTCAATGTCACCACAGGAATAAGAGATAGGGTGACAAAAGAACATGCACTTTTCTATAGACTTGATGTAGTATCAATAGATGAAGATAGTGGAAATACTACGGGCAAATATAGGATGATAAATTGTAACACCTCAGTCATTACACAAGCCTGTCCAAAGATATCCTTTGAGCCAATTCCCATACATTTTTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAGAGATAAGAAATTCAATGGAACAGGAGAATGTAGAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGCTGAACGGCAGTCTAGCAGAAGAAGAGATAGTAATTAGATCTGCCAATCTCTCGAACAATGCTAAAACCATAATAGTACAGCTGAATAAACCTGTAAAAATTAATTGTACAAGACCCAACAATAATACAAGAAGAAGTATACACATAGGACCAGGGAGAGCATTTTATGGAACAGACATAATAGGGGATATAAGACAAGCGCATTGTAACATTAGTGGGAAAGATTGGAATGACACTTTAAAAAAAGATAGTTATAAAATTAAAAGAAAAATTTGAGAATAAAACAATAGTCTTTAATCAATCCTCAGGAGGGGACCCAGAGATAGTGATGCATAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACAACACAGCTGTTTAATAGTACTTGGAATGGTACTGATAATTGGAATGGTACTGAATCAAATAACACTATCACACTCCCATGCAGAATAAAACAAATTATAAACTTGTGGCAGGAAGTAGGAAGAGCAATGTATGCTCCTCCCATCCAAGGGCAAATTAGATGTTCATCAAATATTACAGGGCTGCTGCTAGTAAGAGATGGTGGGAGCAACAATAGTAGTAATGATACAGAGACATTCAGGCCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTCAAAATTGAACCATTAGGAATAGCACCCACCCATGCAAAGAGAAGAGTGGTGCAAAGGGAAAAAAGAGCAGTGGGACTTGGAGCTTTCTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTCTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTAATGTGCCCTGGAATATTAGTTGGAGCCCTAGATGGAATAGATCTCTAGATGAGATTTGGACTAACATGACCTGGAGGCAGTGGGAAAAGGAAATTGACAATTACACAGACACAATATATAACTTAATTGAACTATCACAGAACCAACAAGAACAGAATGAACAAGACTTATTGGCATTAGATAAGTGGGCAAGTTTGTGGAGTTGGTTTGACATTACACAGTGGCTATGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGATAGGTTTAAGAATAGTTTTTACTATACTGTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTGTCATTGCAGACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAGGAACAGAAGAAGGAGGTGGAGAGAGAGACAGAGGCACATCCACGAGATTAGTGCATGGATTCTTAGCACTCATCTGGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAGAGACTTACTCTTGATTGCGGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCACTGAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAAATAAAGAATAGTGCAGTTAGCTTGCTCAACACCACAGCAATAGCAGTAGCTGAGGGGACAGATAGGATTATAGAAGTACTACAAAGAGGCTTTAGAGCTATTCTCCACATACCTACACGAATAAGACAGGGCTTAGAAAGGGCTTTGCTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 7788, - "end": 7880, - "orientation": "forward", - "distance": 0.5806451612903225, - "indel_impact": 0, - "protein": "RPSSQPRGDPTGPKEQKKEVERETEAHPRD", - "nucleotides": "AGACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAGGAACAGAAGAAGGAGGTGGAGAGAGAGACAGAGGCACATCCACGAGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 7789, - "end": 8064, - "orientation": "forward", - "distance": 0.423913043478261, - "indel_impact": 0, - "protein": "DPPPNPEGTRQARRNRRRRWRERQRHIHEISAWILSTHLGRPAEPVPLQLPPLERLTLDCGEDCGTSGTQGVGSTEVLVESPAVLESGNKE", - "nucleotides": "GACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAGGAACAGAAGAAGGAGGTGGAGAGAGAGACAGAGGCACATCCACGAGATTAGTGCATGGATTCTTAGCACTCATCTGGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAGAGACTTACTCTTGATTGCGGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCACTGAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAAATAAAGAATAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8208, - "end": 8849, - "orientation": "forward", - "distance": 0.7375565610859729, - "indel_impact": 0, - "protein": "MGNKLSRGLRAGWPAIRERMRRARPVREPEPAAAGVGAASRDLERHGALTSSNTAATNADVACLEAQQEEEEVGFPVRPQVPLRPMTFKGALDISHFLKEKGGLDGLIYSKRRQDILDLWLYNTQGYFPDWQNYTPGPGVRFPLCFGWCFKLVPVDPDKVEEASVGENNCLLSPENLHGMEDEHREVLQWRFDSRLAFHHMARELHPEYFKDC", - "nucleotides": "ATGGGTAACAAGTTGTCAAGAGGGCTCAGGGCTGGATGGCCTGCCATAAGGGAGAGAATGAGACGAGCTAGGCCAGTAAGAGAGCCAGAGCCAGCAGCAGCTGGGGTGGGAGCAGCATCTCGAGACCTGGAAAGACATGGAGCACTTACAAGTAGCAATACAGCAGCTACCAATGCTGATGTTGCCTGCCTGGAAGCACAACAGGAGGAGGAAGAGGTGGGTTTTCCAGTCAGACCTCAGGTACCTTTAAGGCCAATGACTTTCAAGGGAGCTCTAGATATTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGATGGGCTAATTTACTCCAAAAGAAGACAAGACATCCTTGATCTGTGGCTCTACAACACACAAGGCTACTTCCCTGACTGGCAGAACTACACACCAGGGCCAGGAGTCAGATTTCCACTGTGTTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGATAAGGTAGAAGAGGCCAGTGTAGGAGAGAACAACTGCTTGTTAAGCCCCGAAAACCTTCATGGAATGGAGGACGAACACAGAGAAGTATTGCAGTGGAGGTTTGACAGTCGCCTAGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAATATTTCAAGGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115518.1": [ - { - "region": "gag", - "start": 739, - "end": 2238, - "orientation": "forward", - "distance": 0.2675944333996021, - "indel_impact": 0, - "protein": "MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", - "nucleotides": "ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGACGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGCTAATGTAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 2031, - "end": 5042, - "orientation": "forward", - "distance": 0.15109343936381703, - "indel_impact": 0, - "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTACTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4987, - "end": 5565, - "orientation": "forward", - "distance": 0.36269430051813467, - "indel_impact": 0, - "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", - "nucleotides": "ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5505, - "end": 5795, - "orientation": "forward", - "distance": 0.2886597938144331, - "indel_impact": 0, - "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", - "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5776, - "end": 5990, - "orientation": "forward", - "distance": 0.25, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5915, - "end": 5990, - "orientation": "forward", - "distance": 0.46153846153846145, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 6007, - "end": 6252, - "orientation": "forward", - "distance": 0.727586206896552, - "indel_impact": 0, - "protein": "MQPLHIAAIVGLIVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL", - "nucleotides": "ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAATAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 6170, - "end": 8710, - "orientation": "forward", - "distance": 0.5247139588100684, - "indel_impact": 0, - "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTPEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", - "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCCCAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8292, - "end": 8384, - "orientation": "forward", - "distance": 0.29032258064516125, - "indel_impact": 0, - "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", - "nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8293, - "end": 8568, - "orientation": "forward", - "distance": 0.3913043478260869, - "indel_impact": 0, - "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", - "nucleotides": "GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8712, - "end": 9326, - "orientation": "forward", - "distance": 0.5333333333333332, - "indel_impact": 0, - "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", - "nucleotides": "ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTACGAGCCTGCATGGGATGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115065.1": [ - { - "region": "gag", - "start": 221, - "end": 1729, - "orientation": "forward", - "distance": 0.2507968127490041, - "indel_impact": 0, - "protein": "MGARASVLSGGELDRWEKIYLRPGGKKKYRLKHIVWASRELERFAVNPGLLESSEGCRQILGQLQPALQTGSEELRSLYNTVAVLYCVHPRINVKDTKEALDKIEEEQNKSKKKAQQAAADTGNSSNSQVSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINAEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHNPPIPVGDIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTKSASIMVQGGNFRNQRRNVKCFNCGKEGHTAKNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPAESFRFGEEATAPPQKQETKDQELYPLASLRSLFGNDP", - "nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGCGGAGAATTAGATAGATGGGAAAAAATTTACCTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTCAACCCTGGCCTGTTAGAGTCATCAGAAGGCTGCAGGCAAATTCTGGGACAGCTACAACCAGCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAGTCCTCTATTGCGTACATCCAAGGATAAATGTAAAAGACACCAAAGAAGCTCTAGACAAAATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAACAAGCAGCAGCTGACACAGGAAACAGCAGCAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACCTACAGGGGCAAATGGTACATCAACCCTTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAATAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCACTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAGATGTTAAAAGAAACCATCAATGCTGAGGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGCCCTATTGCACCAGGCCAGATGAGGGAACCAAGGGGAAGTGACATAGCAGGAACCACTAGTACCCTTCAGGAACAAATAGGATGGATGACACATAATCCACCTATCCCAGTAGGAGATATCTATAAAAGATGGATAATCCTGGGGTTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTTTGGACATAAGACAAGGACCAAAGGAACCCTTTAGGGATTATGTAGACCGGTTCTATAAAACTCTAAGGGCTGAGCAAGCGTCACAGGATGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAATCCAGATTGTAAGACCATTTTAAAAGCATTAGGACCAGCAGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGGGGACCCGGCCACAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAAATCAGCCTCCATAATGGTGCAGGGAGGCAATTTTAGGAACCAAAGAAGGAATGTTAAGTGTTTCAATTGTGGCAAAGAAGGACACACAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATTTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTGGGGAAGAGGCAACAGCTCCCCCTCAGAAGCAGGAGACGAAGGACCAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1522, - "end": 4533, - "orientation": "forward", - "distance": 0.20775347912524844, - "indel_impact": 0, - "protein": "FFREDLAFPQGKAREFSPEQTRANSPASRELQVWGRGNSSPSEAGDEGPGTVSFSFPQITLWQRPIISIKIGGQLKEALLDTGADDTVLEEMNLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALIEICAEMEKEGKITKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNEAPGVRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKENPDIVIYQYMDDLYVGSDLEIEQHRTKIEELRQHLLGWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPDKDSWTINDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKEPVHGAYYDPSKDLIAEVQKQGGDQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIALEAIVIWGKTPKFKLPIQKETWEMWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVVSLMDTTNQRTELHAIHLALQDSGSTVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKIYLAWVPAHKGIGGNEQVDKLVSSGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEPMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIAAETGQETAYFILKLAGRWPVKIIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQISKIQNFRVYYRDSREPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKILRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAGATTTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTGGGGAAGAGGCAACAGCTCCCCCTCAGAAGCAGGAGACGAAGGACCAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAAAAATAGGAGGGCAACTAAAAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATTTGTGGACATAAAGCTATAGGTACAGTATTAATAGGACCTACACCAGTCAACATAATTGGAAGAAATCTGTTGACCCAGCTTGGTTGCACTTTAAATTTTCCCATCAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAGAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAGGCATTAATAGAAATTTGTGCAGAAATGGAAAAGGAAGGGAAAATTACAAAAATTGGGCCTGAGAATCCATATAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCCGAGAACTTAATAAGAGAACACAAGACTTCTGGGAAGTTCAACTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTAGATGTGGGTGATGCATATTTTTCAGTTCCTTTAGACAAAGACTTCAGAAAGTATACTGCATTTACCATACCCAGTATAAACAATGAGGCACCAGGAGTTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGGAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGATTTAGAAATAGAGCAGCACAGAACAAAAATAGAGGAACTGAGACAACATCTGTTGGGGTGGGGACTCACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATACAGCTGCCAGACAAAGACAGCTGGACTATCAATGACATACAGAAGTTAGTGGGAAAGTTGAATTGGGCAAGCCAAATCTATCCAGGGATTAAAGTAAAGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGAACCAGTACATGGAGCATATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGGGGAGACCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGGAAATATGCAAGAACGAGGGGTGCCCACACTAATGATGTAAAACAGTTAACAGAGGCAGTGCAAAAAATAGCCCTAGAAGCCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCTATACAAAAGGAAACATGGGAAATGTGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAATTTGTCAATACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAGACTAAATTAGGAAAAGCAGGGTATGTTACGGACAGAGGAAGACAAAAGGTTGTCTCCCTAATGGACACAACAAATCAGAGGACTGAGTTACACGCTATTCATCTAGCTTTGCAGGATTCAGGATCAACAGTAAACATAGTGACAGACTCACAATATGCCTTAGGGATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGCTAATAAAAAAGGAAAAGATTTACCTGGCATGGGTCCCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTAGTGGAATCAGAAAAGTACTATTTCTGGATGGAATAGATAAGGCCCAAGAAGAACATGAAAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAGTGTCAGTTAAAAGGAGAACCTATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTGGATTGTACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATACATAGAAGCAGAAGTTATTGCAGCAGAAACAGGGCAGGAAACAGCATACTTTATCTTAAAATTGGCAGGAAGATGGCCAGTAAAAATAATACATACAGACAATGGCAGCAATTTCACCAGCACTACGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGTATTCCCTACAATCCTCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATTCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAACTACAAAAACAAATTTCAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGAACCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCCTTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4478, - "end": 5056, - "orientation": "forward", - "distance": 0.3969072164948453, - "indel_impact": 0, - "protein": "MENRWQVMIVWQVDRMRIRTWNSLVKHHMHISKKAQRWVYRHHYESHNPKTSSEVHIPLGEARLVIKTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPGLADQLIHLYYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALTALITPKRRKPPLPSVTKLTEDRWNKSQRTKGHKGSHTMNGH", - "nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATATTTCAAAGAAAGCTCAGAGATGGGTTTACAGACATCACTATGAAAGCCATAATCCAAAAACAAGTTCAGAAGTACACATCCCATTAGGGGAAGCAAGATTGGTAATAAAAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGCCAGGGAGTATCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTGGCCTGGCAGACCAACTAATTCACCTGTATTATTTTGATTGTTTTTCAGAATCTGCTATAAGGAATGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAGGCAGGACATAACAAGGTAGGATCCCTACAATACTTGGCATTAACAGCATTAATAACACCAAAAAGGAGAAAGCCACCTTTGCCCAGTGTTACAAAATTAACAGAGGACAGATGGAACAAGTCCCAGAGGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGACATTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 4996, - "end": 5286, - "orientation": "forward", - "distance": 0.3125, - "indel_impact": 0, - "protein": "MEQVPEDQGPQREPYNEWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDTWAGVEAIIRTLQQLLFIHFRIGCQHSRIGIIRQRRARNGASRS", - "nucleotides": "ATGGAACAAGTCCCAGAGGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGACATTAGAGCTCTTAGAAGAACTTAAGAGTGAAGCTGTTAGGCATTTTCCTAGACCATGGCTTCATAGCTTAGGACAATATATCTATGAAACTTATGGAGATACTTGGGCAGGGGTGGAAGCCATAATAAGAACCCTGCAACAACTGTTGTTCATTCATTTCAGAATTGGGTGTCAACATAGCAGGATAGGAATTATTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5267, - "end": 5481, - "orientation": "forward", - "distance": 0.5945945945945945, - "indel_impact": 0, - "protein": "MEPVDPSLEPWKHPGSQPMTACNNCYCKRCCFHCQVCFTRKGLGISYGRKKRRQRRRASHSSQNHQAALPEX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGCTTAGAGCCCTGGAAACATCCAGGAAGTCAACCTATGACTGCTTGTAACAATTGCTATTGTAAACGGTGTTGCTTTCATTGCCAAGTTTGCTTCACAAGAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5406, - "end": 5481, - "orientation": "forward", - "distance": 0.5925925925925926, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLTAVRIIKRLYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5498, - "end": 5743, - "orientation": "forward", - "distance": 0.7790697674418607, - "indel_impact": 0, - "protein": "MHALEIAAIVGLVVAAIIAIVVWSIVLIEYRKILRQRKIDRLIDRIRERAEDSGNESDGDQEELSALVEMGHLVPWDGDDM", - "nucleotides": "ATGCATGCCTTAGAAATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGTCTATAGTATTAATAGAATATAGGAAAATTTTAAGACAAAGGAAAATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAAAGTGACGGGGACCAGGAGGAGTTATCAGCACTTGTGGAGATGGGGCATCTTGTTCCTTGGGATGGTGATGATATGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 5661, - "end": 8261, - "orientation": "forward", - "distance": 0.6331111111111118, - "indel_impact": 0, - "protein": "MKVTGTRRSYQHLWRWGILFLGMVMICSANNLWVTVYYGVPVWKEATTTLFCASDAKAYETEKHNVWATHACVPTDPSPQEVALENVTETFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDNLNLNRPNNNTCSNNTNYNITEKGEIKNCSFNVTTGIRDRVTKEHALFYKLDVVPIDEGSGNTTGKYRMINCNTSVITQACPKVSFEPIPIHFCAPAGFAILKCKDKKFNGTGECRNVSTVQCTHGIRPVVSTQLLLNGSLAEEEIVIRSANLSNNAKTIIVQLNKSVKINCTRPNNNTRRSIHIGPGRAFYGTDIIGDIRQAHCNISGKDWNDTLKQIVIKLKEKFENKTIVFTQSSGGDPEIVMHSFNCGGEFFYCNTTQLFNSTWNNSTWNGTDNWNGTESNSTITLPCRIKQIINLWQEVGRAMYAPPIQGQIRCSSNITGLLLVRDGGSNNSSNDTETFRPGGGDMRDNWRSELYKYKVVKIEPLGIAPTHAKRRVVQREKRAIGLGAFFLGFLGAAGSTMGAASVTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLALERYLKDQQLLGIWGCSGKLICTTNVPWNISWSPRWNRSLDEIWTNMTWKQWEKEIDNYTDIIYNLIEQSQNQQEQNEQDLLALDKWASLWNWFDITQWLWYIKIFIMIVGGLIGLRIVFTILSIVNRVRQGYSPLSLQTLLPTQRGPDRPEGTEEGGGERDRGTSTRLVHGFLALIWDDLRSLFLFSYHRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWSQEIKNSAVSLLNTTAIAVAEGTDRIIEVLQRGFRAILHIPTRIRQGLERALL", - "nucleotides": "ATGAAAGTGACGGGGACCAGGAGGAGTTATCAGCACTTGTGGAGATGGGGCATCTTGTTCCTTGGGATGGTGATGATATGTAGTGCCAACAACTTGTGGGTCACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAACTACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGAGACAGAGAAGCATAATGTTTGGGCAACCCATGCCTGTGTACCCACAGACCCCAGCCCACAGGAAGTAGCATTGGAAAATGTGACAGAAACATTTAACATGTGGAAAAATGACATGGTAGAGCAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTGAAACTAACCCCACTCTGTGTTACTTTAAATTGCACTGATAATTTAAATCTAAATCGCCCTAACAATAATACTTGTAGTAATAATACTAATTATAATATAACGGAAAAAGGAGAAATAAAAAACTGCTCTTTCAATGTCACCACAGGAATAAGAGATAGGGTGACAAAAGAACATGCACTTTTCTATAAACTTGATGTAGTACCAATAGATGAAGGTAGTGGAAATACTACGGGCAAATATAGGATGATAAATTGTAACACCTCAGTCATTACACAAGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTTTTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAAGATAAGAAATTCAATGGAACAGGAGAATGTAGAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGCTGAACGGCAGTCTAGCAGAAGAAGAGATAGTAATTAGATCTGCCAATCTCTCGAACAATGCTAAAACCATAATAGTACAGCTGAATAAATCTGTAAAAATTAATTGTACAAGACCCAACAATAATACAAGAAGAAGTATACACATAGGACCAGGGAGGGCATTTTATGGAACAGACATAATAGGAGATATAAGACAAGCGCATTGTAACATTAGTGGGAAAGATTGGAATGACACTTTAAAACAGATAGTTATAAAATTAAAAGAAAAATTTGAGAATAAAACAATAGTCTTTACTCAATCCTCAGGAGGGGACCCAGAGATAGTGATGCATAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAATAGTACTTGGAATGGTACTGACAATTGGAATGGTACTGAATCAAATAGCACTATCACACTCCCATGCAGAATAAAACAAATTATAAACTTGTGGCAGGAAGTAGGAAGAGCAATGTATGCCCCTCCCATCCAAGGGCAAATTAGATGTTCATCAAATATTACAGGGCTGCTGCTAGTAAGAGATGGTGGGAGCAACAATAGTAGTAATGATACAGAGACCTTCAGGCCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTCAAAATTGAACCATTAGGAATAGCACCCACCCATGCAAAGAGAAGAGTGGTGCAAAGGGAAAAAAGAGCAATAGGACTTGGAGCTTTCTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTTTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTAATGTGCCCTGGAATATTAGTTGGAGCCCTAGATGGAATAGATCTCTAGATGAGATTTGGACTAACATGACCTGGAAGCAGTGGGAAAAGGAAATTGACAATTACACAGACATAATATATAACCTAATTGAACAATCACAGAACCAACAAGAACAGAATGAACAAGACTTATTGGCATTAGATAAGTGGGCAAGTTTGTGGAATTGGTTTGACATTACACAGTGGCTATGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGATAGGTTTAAGAATAGTTTTTACTATACTGTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTGTCATTGCAGACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAGGAACAGAAGAAGGAGGTGGAGAGAGAGACAGAGGCACATCCACGAGATTAGTGCATGGATTCTTAGCACTCATCTGGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAGAGACTTACTCTTGATTGCGGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCACTGAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAAATAAAGAATAGTGCAGTTAGCTTGCTCAACACCACAGCAATAGCAGTAGCTGAGGGGACAGATAGGATTATAGAAGTATTACAAAGAGGCTTTAGAGCTATTCTCCACATACCTACACGAATAAGACAGGGCTTAGAAAGGGCTTTGCTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 7843, - "end": 7935, - "orientation": "forward", - "distance": 0.5806451612903225, - "indel_impact": 0, - "protein": "RPSSQPRGDPTGPKEQKKEVERETEAHPRD", - "nucleotides": "AGACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAGGAACAGAAGAAGGAGGTGGAGAGAGAGACAGAGGCACATCCACGAGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 7844, - "end": 8119, - "orientation": "forward", - "distance": 0.423913043478261, - "indel_impact": 0, - "protein": "DPPPNPEGTRQARRNRRRRWRERQRHIHEISAWILSTHLGRPAEPVPLQLPPLERLTLDCGEDCGTSGTQGVGSTEVLVESPAVLESGNKE", - "nucleotides": "GACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAGGAACAGAAGAAGGAGGTGGAGAGAGAGACAGAGGCACATCCACGAGATTAGTGCATGGATTCTTAGCACTCATCTGGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAGAGACTTACTCTTGATTGCGGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCACTGAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAAATAAAGAATAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8263, - "end": 8904, - "orientation": "forward", - "distance": 0.7104072398190044, - "indel_impact": 0, - "protein": "MGNKLSRGLRAGWPTIRERMRRARPVREPEPAAAGVGAASRDLERHGALTSSNTAATNADVACLEAQQEEEEVGFPVRPQVPLRPMTYKGALDISHFLKEKGGLDGLIYSKRRQDILDLWLYNTQGYFPDWQNYTPGPGVRFPLCFGWCFKLVPVDPDKVEEASVGENNCLLSPENLHGMEDEHREVLQWRFDSRLAFHHVARELHPEYYKDC", - "nucleotides": "ATGGGTAACAAGTTGTCAAGAGGGCTCAGGGCTGGATGGCCTACCATAAGGGAGAGAATGAGACGAGCTAGGCCAGTAAGAGAGCCAGAGCCAGCAGCAGCTGGGGTGGGAGCAGCATCTCGAGACCTGGAAAGACATGGAGCACTTACAAGTAGCAATACAGCAGCTACCAATGCTGATGTTGCCTGCCTGGAAGCACAACAGGAGGAGGAAGAGGTGGGTTTTCCAGTCAGACCTCAGGTACCTTTAAGGCCAATGACTTACAAGGGAGCTCTAGATATTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGATGGGCTAATTTACTCCAAAAGAAGACAAGACATCCTTGATCTGTGGCTCTACAACACACAAGGCTACTTCCCTGACTGGCAGAACTACACACCAGGGCCAGGAGTCAGATTTCCACTGTGTTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGATAAGGTAGAAGAGGCCAGTGTAGGAGAGAACAACTGCTTGTTAAGCCCCGAGAACCTGCATGGAATGGAGGACGAACACAGAGAAGTATTGCAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTATTACAAGGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115464.1": [ - { - "region": "gag", - "start": 794, - "end": 2296, - "orientation": "forward", - "distance": 1.495, - "indel_impact": 637, - "protein": "MTETLLVQNANPDCKTILKALRPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSNIIMMQRGNFRNQRKTVKCFNCGKERHIARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLRKIWPSSKGRPRNFLQSRPEPTAPPEESFRFREETATPPQKQEPVDKEVYPLASLKSLFGNDPSSQ", - "nucleotides": "ATAAGTGCGAGAGCGTCTGTATTAAGCGGGGGAGAATTAGATAAATGGGAAAAAATTCGGTTAAGGCCAGGAGGAAAGAAAAGATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAACCCCGGCCTTTTAGAAACAACAGAAGGATGTAAACAAATACTGGAACAGCTGCAACCATCCCTTCCGACAGGATCAGAAGAACTTAGATCATTATTTAATACAGTAGCAACCCTCTATTGTGTGCATAAGAGAATAGAGGTACAAGACACCAAGGAAGCCTTAGAAAAGATAGAAGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAGGCAGTAGCTGACAAAGGAAGTACCAGCCAGGTCAGCCAAAATTACCCGATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGGCCATATCGCCTAGAACTTTAAATGCATAGGTGAAAGTAGTAGAAGAGAAGGCCTTTAGCCCAGAGGTAATACCCATGTTTTCAGCATTATCGGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAGGCTGCAGAATAAGATAGAGTGCATCCAGTGCATGCAGGGCCTGTTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATAGATGACAAGTAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTAGGATTAAATAAGATAGTAAGAATGTATAGCCCTACCAGCATTTTGGACATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAGTCCTAAGAGCCGAGCAAGCATCACAGGATGTAAAAAATTAGATGACAGAAACCTTATTAGTCCAAAATGCAAACCCAGATTGTAAGACTATTTTAAAAGCATTAAGACCAGCAGCAACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAAATATCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGACTGTTAAGTGTTTCAATTGTGGCAAAGAAAGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAAGGAAGATCTGGCCTTCCTCCAAAGGAAGGCCAAGGAACTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTAGGGAGGAGACAGCAACTCCTCCTCAGAAGCAGGAGCCGGTGGACAAAGAAGTGTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 2089, - "end": 5100, - "orientation": "forward", - "distance": 0.3214711729622268, - "indel_impact": 1716, - "protein": "AKIKQECGIPYNPQSQEVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDNRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIKDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAAGGAAGATCTGGCCTTCCTCCAAAGGAAGGCCAAGGAACTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTAGGGAGGAGACAGCAACTCCTCCTCAGAAGCAGGAGCCGGTGGACAAAGAAGTGTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGAGGGCAGCTAAAGGAAGCTCTATTAGATACAGGAGCAGATGACACAGTATTAGAAGAAATAAGTTTGCCAGGAAGATAGAAACCAAAAATGATAGAAGGAATTGGAGGCTTTATCAAAGTAAGACAGTATGATCAGATAACTATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGCAGAAATTTGTTGACTCAGATTAGTTGTACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTACAATTAAAACCAGGAATAGATAGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAAGAAAAAAAGATTTCAAAAATTAGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGAAGAAAATTAGTAGATTTCAAGGAACTTAATAAAAGAACTCAAGACTTCTAAGAAGTTCAATTAAGAATACCACACCCCGCAAGGTTAAAAAAGAAGAAATCAATAACAGTACTAGATGTAGGTGATGCATATTTTTCAATTCCCTTAGATAAAGACTTCAAGAAGTATACTGCATTTACCATACCTAGTATAAATAATAAGACACCAGAGATTAGATATCAGTACAATGTGCTTCCACAGGGATAGAAAAGATCACCAGCAATATTCCAAAGTAGCATGATAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGACATAGTTATCTATCAATACATAGATGACTTGTATGTAAGATCTGACTTAGAAATAAGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACACTTGTTGAAGTAGAGATTGACCACACCAGATAAAAAACATCAGAAAGAACCCCCATTCCTGTGGATAAGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGACAAAGATAGCTGGACTGTCAATGACATACAGAAGCTAGTAAGAAAATTGAATTGAGCAAGTCAGATTTATGCAGAGATTAAAGTGAGACAATTATGTAAACTCCTTAAAGGAGCCAAAGCGCTAACAGAAGTGATACAACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAAAGAAATTCTAAAAGAACCAGTACATGAAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAGTTACAGAAGCAGAGGCAAGGCCAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAGTATGCAAGAACGAGAGGTACCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTGATATGAGGAAAGACCCCTAGATTTAAACTACCCATACAGAAAGAAACATAAGATACCTAGTGGACAGAATATTGGCAAGCCACCTAGATTCCCGAGTAAGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATAGTACCAATTAGAAAAAGAGCCTATTGTAGGAGCAGAAACTTTCTATGTAGATAGGGCAGCTAATAAAGAGACTAAATTAAGAAAAGCAGGATATGTTACTAGCAGAGGAAGACAAAAGGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAACTGCAAGCAATTTGTCTAGCATTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCACTAAGAATAATTCAAGCACAACCAGATAAGAGTGAATCAGAGATAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTACCTTGCATAGGTACCAGCACACAAAAGAATTAGAAGAAATGAACAAGTAGATAAATTAGTCAGTGCTAGAATCAGGAAAGTCCTATTTTTAGATAGAATAGATAAGGCCCAAGAAGAGCATAAGAAATATCACAATAATTAAAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAAAAATAGTAGCCAGTTGTGATAAATGCCAGCTAAAAAAAGAAGCCACGCATAGACAAGTAGACTGTAGTCCAAGAATATGGCAATTAGATTGTACACATTTAGAAGGAAAAGTTATCCTAGTAGCAGTTCATGTAGCCAGTAGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAAGAAACAGCATACTTCCTCTTAAAATTAGCAGGAAGGTAGCCAGTGAAAGCAATACATACAGACAATGGAACCAATTTCACCAGTGCTACGGTTAAGGCCGCCTGTTAGTAGGCAAAGATCAAGCAGGAATGTGGCATTCCCTACAATCCCCAAAGTCAAGAAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAAGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTCCAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAACAGAGATCCACTTTGGAAAGGACCAGCAAAACTTCTTTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAAGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 5045, - "end": 5623, - "orientation": "forward", - "distance": 1.494270833333335, - "indel_impact": 248, - "protein": "MENRWQVMIVWQVDRMRISTWKSLVKHHMYISKKAQGWFYRHHYENPHPRISSEVHIPLGDARLVITTY", - "nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGCACATGGAAAAGTTTAGTAAAACACCATATGTACATTTCAAAGAAAGCCCAAGGATGGTTTTATAGACATCACTATGAAAATCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCGCTAGGGGATGCTAGATTGGTAATAACAACATATTAGGGTCTGCATACAGGAGAAAGAGACTGGCATTTAGGTCAAGGAGTCTCCATAGAATGGAAGGAAAGGAAATATAGCACACAAGTAACCCCTGACCTAGCAGACCAACTAATTCACCTGTATTATTTTGATTGTTTTGCAGAATCTGCTATAAGAGAGGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCACTAACAGCATTAGTAACACCAAAAAAGACAAAGCCACCTTTGCCTAGTGTGACAAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAAGGAGCCAAACAATGAATGGACACTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5563, - "end": 5853, - "orientation": "forward", - "distance": 0.375, - "indel_impact": 0, - "protein": "MEQAPEDQGPQKEPNNEWTLELLEELKREAVRHFPRTWLHGLGQHIYETYEDTWTGVGALIRILQQLLFIHFRIRCQHSRIGITRQRRARNRASRS", - "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAAGGAGCCAAACAATGAATGGACACTAGAGCTCTTAGAAGAGCTTAAGAGAGAAGCTGTTAGACATTTTCCTAGGACATGGCTTCATGGCTTAGGACAACATATCTATGAAACTTATGAGGATACTTGGACAGGAGTAGGAGCCTTAATAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGAATTAGGTGCCAACATAGCAGAATAGGCATCACTCGACAGAGGAGAGCAAGAAATAGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5834, - "end": 6048, - "orientation": "forward", - "distance": 0.45833333333333326, - "indel_impact": 129, - "protein": "MRILGQE", - "nucleotides": "ATAGAGCCAGTAGATCCTAGATTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAGACTCCTTGTACCAAATGCTATTGTAAAAAATGCTGCTTTCATTGCCAAGTTTGTTTCACAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTACCTCTACCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5973, - "end": 6048, - "orientation": "forward", - "distance": 0.46153846153846145, - "indel_impact": 0, - "protein": "MAGRSGDSDEDLLKTVRLIKYLYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTACCTCTACCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 6065, - "end": 6310, - "orientation": "forward", - "distance": 1.8817073170731702, - "indel_impact": 124, - "protein": "MQSLYILTIVALVVAAILAIVV", - "nucleotides": "ATGCAATCTTTATATATATTAACAATAGTAGCATTAGTAGTAGCAGCAATATTAGCAATAGTTGTGTAGGCCATAGTACTCATAGAATATAAGAAAATATTAAAACAAAGGAGAATAGATAGGTTAATTGATAGAATAATAGATAAGGCAGAAGACAGTGGCAATGAGAGTGAAGGAGATCAAGAGGAGTTATCAGCATTTGTAGAGATAGGGCATCATGCTCCTTAGAATGTTGATGATCTATAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 6228, - "end": 8798, - "orientation": "forward", - "distance": 1.8316091954022926, - "indel_impact": 1449, - "protein": "MTNCTFNITTSIKDKIKKEAALFYKIDLVEIDEKKNNSSTRYRLINCNTSAITQACPKVSFKPIPIHFCAPASFAILKCNNKKFSGKGPCTNVSTVQCTHRIKPVVSTQLLLNGSLAEEEVMIRSDNITDNTKNIIVQLKEAIRIFCIRPNNNTRKSINIRPGRAFYTTGDIIRDIRQAHCNISGNWSNTLKQIATQLGKQLNQTQQIIFNSSAGKDPEIVTHSFNCGKKFFYCNSSSLFNST", - "nucleotides": "ATGAGAGTGAAGGAGATCAAGAGGAGTTATCAGCATTTGTAGAGATAGGGCATCATGCTCCTTAGAATGTTGATGATCTATAGTACTGCAGACCAGTGGTGGGTCACAGTCTATTATAAGGTACCTGTGTGGAGAGAAGCAAACACCACTCTATTTTGTGCATCAGATGCTAAAGCATATAGTACAGAGGCACATAATGTTTAGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAATAGTAATAGAAAATGTAACAGAAGATTTTAACATGTGGAAGAATAACATGGTAGATCAGATGCATGAGGATATAATCAGTTTATAGGATCAAAGTCTAAAGCCATGTGTGAAACTAACCCCACTCTGTGTTACTTTAAATTGTACTGATATTAGGAATAGTACCGTTACTTCTAATACTACTTCTAATACTACTTAGGGAGAAATGACAAACTGCACTTTCAATATCACCACAAGCATAAAGGATAAGATAAAAAAAGAAGCGGCACTTTTTTATAAAATTGATTTAGTAGAAATAGATGAAAAGAAAAATAACAGTAGTACCAGATATAGGTTGATAAATTGTAACACCTCAGCCATTACACAAGCCTGCCCAAAGGTATCCTTTAAGCCAATTCCCATACATTTTTGTGCCCCGGCTAGTTTTGCAATTCTAAAGTGTAATAATAAGAAGTTCAGTGGAAAAGGACCATGTACAAATGTCAGCACAGTGCAATGTACACATAGAATTAAGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAATGATTAGATCTGACAATATCACAGACAATACTAAAAACATAATAGTACAGTTGAAAGAAGCCATACGAATTTTCTGTATAAGACCCAACAACAATACAAGAAAAAGTATAAATATAAGACCAGGAAGAGCATTTTATACAACAGGAGATATAATAAGAGACATAAGGCAAGCACATTGTAACATTAGTGGAAATTGGAGTAACACTTTAAAACAGATAGCTACACAATTAGGAAAACAACTGAATCAAACACAACAAATAATCTTTAATTCATCCGCAGGAAAGGACCCAGAGATTGTAACACACAGTTTTAATTGTGGAAAGAAATTCTTCTATTGTAATTCATCATCACTGTTTAATAGTACCTAGACTAAAAATGGTACTGATAGTTGGCAGTCTAATGATACTCAGAATAGTAATATCACACTCCAATGCAGAATAAAACAAATTATAAACCTGTGGCAGGAAGTAAGAAAAGCAATGTATGCCCCTCCCATCAGTAGACAAATTAACTGTACATCAAATATTACAGGGCTAGTTTTAACAAGAGATAGGAGGAATGAAACTAAGACCTTTAGACCTGGAAGAGAAAATATGAAGGATAATTGGAGAAGTAAATTATATAAATATAAAGTAGTAAGAATTGAACCATTAAGAATAGCACCCACCAAGGCAAAAAGAAGAGTGGTGCAGAGAGAGAAAAGAGCAGTAAGACTAGGAGCTATGTTCCTTAAGTTCTTAGGAGCAGCCAGAAGCACTATAGGCGCAGCGTCGATAGCGCTGACGGAACAGGCCAGACGAGTCTTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATTTGTTGCAACTCACAGTCTAAGGCATTAAGCAGCTCCAGGCAAGAGTCCTGGCTGTAGAAAGATACCTACAGGATCAACAGCTCCTAGGACTTTGAGGTTGCTCTAGAAAACTCATTTGCACCACTACTGTGCCTTAGAATCGTAGTTGAGGTAGGCATAACAAAAATTACAAAAGTCTAGATGACATTTAGGATAACATGACCTAGATAGAGTAGGAAAAAGAAATTGACAATTACACAAGCTTAATATACACCTTAATTACAGAATCACATTCCCAACAAAAAAAGAATGAACAAGAATTATTGGCATTAGATAAATAGGCAAGTTTGTAGAATTAGTTTGACATATCACAATGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTAGTAAGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAAAGTTAGGCAAGGATACTCACCATTATCATTTCAGACCCTCCTCCCAGCCCCGAGAGGAGCCGACAGGCCCGAAAGAATAGAAGAAAGAGGTAGAGAGAGAGACAAAGGCAGATCCGGTCGATTAGTGAACGGATTCTTAGCACTTATCTAGGACGATCTACGGAGCCTATGCCTCTTCAGCTACCACCGCTTGAGCGACTTACTCTTGATTGTGATCAAGATTGTGGAACTTCTAAGACGCAAGAGGTAAGAAGCCCTCAAATATTAGTAGAATCTCCTGCAGTACTAGAGTCAGGAACTAAAGAATAGTGCTGTCAGCTTGCTCAACACCACAGCTATAGTAGTAGCTGAAAGGACAGATAAGATAATAGAAATATTACAAAGAATTAGTAGAGCTTTTCTCCACATACCTAGGAGAATAAGACAGGGCTTAGAAAAGGCTTTGCTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8380, - "end": 8472, - "orientation": "forward", - "distance": 0.7741935483870968, - "indel_impact": 37, - "protein": "RPSSQPREEPTGPKE", - "nucleotides": "AGACCCTCCTCCCAGCCCCGAGAGGAGCCGACAGGCCCGAAAGAATAGAAGAAAGAGGTAGAGAGAGAGACAAAGGCAGATCCGGTCGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8381, - "end": 8656, - "orientation": "forward", - "distance": 0.5217391304347827, - "indel_impact": 45, - "protein": "RERQRQIRSISERILSTYLGRSTEPMPLQLPPLERLTLDCDQDCGTSKTQEVRSPQILVESPAVLESGTKE", - "nucleotides": "GACCCTCCTCCCAGCCCCGAGAGGAGCCGACAGGCCCGAAAGAATAGAAGAAAGAGGTAGAGAGAGAGACAAAGGCAGATCCGGTCGATTAGTGAACGGATTCTTAGCACTTATCTAGGACGATCTACGGAGCCTATGCCTCTTCAGCTACCACCGCTTGAGCGACTTACTCTTGATTGTGATCAAGATTGTGGAACTTCTAAGACGCAAGAGGTAAGAAGCCCTCAAATATTAGTAGAATCTCCTGCAGTACTAGAGTCAGGAACTAAAGAATAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8800, - "end": 9450, - "orientation": "forward", - "distance": 1.1589371980676328, - "indel_impact": 178, - "protein": "MTYKAALDLSHFLKEKGGLEGLVYSQKRQDILDLWIYHTQGYFPDWQNYTPGPRIRYPLTFGWCFKLVPLEPDQVEEANEGENNSLLHPLSQHRMDDPEKEVLVWRFDSRLAFHHMAREKHPEFYKDC", - "nucleotides": "ATAGGTGGCAAGTGGTCAAAAAGTAGTAAGGTTAAATAGAATGCAGTGAAAGAAAGAATAAGACGAGCTCAGCCAACAGCAGATAAAGAACGAGCTGAGCCAGCAGCAGATAAGGTAAGAGCAGCATCTAGAGACCTAGAAAAATATGGAGCACTTACAAGTAAGAATACAGCAGCTACTAATGCTGATTGTGCCTGGCTAGAAGCACAAGAAGAGGAGGATGAGGTAGGTTTTCCAGTCAGACCTCAGTTACCTTTAAGACCCATGACTTATAAAGCAGCTCTAGATCTGAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAGTTTACTCCCAAAAAAGGCAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTATTTCCCTGATTGGCAGAACTACACACCAGGGCCAAGGATCAGATATCCCCTGACCTTTGGATGGTGCTTCAAGCTAGTACCACTTGAGCCAGATCAGGTAGAAGAGGCCAATGAAGGAGAGAACAACAGCCTGTTACACCCTCTGAGCCAGCATAGGATGGATGACCCGGAGAAAGAAGTGCTAGTGTGGAGATTTGACAGCCGCCTCGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTTCTACAAAGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115530.1": [ - { - "region": "gag", - "start": 746, - "end": 2245, - "orientation": "forward", - "distance": 0.2675944333996021, - "indel_impact": 0, - "protein": "MGARASILSGGKLDSWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", - "nucleotides": "ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATTCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGTCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGTTAATGTAATGATGCAAAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 2038, - "end": 5049, - "orientation": "forward", - "distance": 0.15109343936381703, - "indel_impact": 0, - "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVHQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACGTGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCCATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTGCTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4994, - "end": 5572, - "orientation": "forward", - "distance": 0.37823834196891193, - "indel_impact": 0, - "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPDLADQLIHLYYFDCFSESAIRNAILGHIVNPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", - "nucleotides": "ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACACATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTGACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAATCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5512, - "end": 5802, - "orientation": "forward", - "distance": 0.2886597938144331, - "indel_impact": 0, - "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", - "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5783, - "end": 5997, - "orientation": "forward", - "distance": 0.25, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5922, - "end": 5997, - "orientation": "forward", - "distance": 0.46153846153846145, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 6014, - "end": 6259, - "orientation": "forward", - "distance": 0.6931034482758622, - "indel_impact": 0, - "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL", - "nucleotides": "ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 6177, - "end": 8717, - "orientation": "forward", - "distance": 0.528604118993135, - "indel_impact": 0, - "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTESHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSKEGKMAEEMRNCSFNITTEIRNKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKSVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", - "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGTCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAATATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTAAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAGAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATGATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAGTGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGGGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGGGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTATTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8299, - "end": 8391, - "orientation": "forward", - "distance": 0.29032258064516125, - "indel_impact": 0, - "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", - "nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8300, - "end": 8575, - "orientation": "forward", - "distance": 0.423913043478261, - "indel_impact": 0, - "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", - "nucleotides": "GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8719, - "end": 9333, - "orientation": "forward", - "distance": 0.5333333333333332, - "indel_impact": 0, - "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTYKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", - "nucleotides": "ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTACAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTACGAGCCTGCATGGGGTGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115520.1": [ - { - "region": "gag", - "start": 695, - "end": 2194, - "orientation": "forward", - "distance": 0.2675944333996021, - "indel_impact": 0, - "protein": "MGARASILSGGKLDSWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", - "nucleotides": "ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATTCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGTTAATGTAATGATGCAAAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1987, - "end": 5003, - "orientation": "forward", - "distance": 1.448607975921763, - "indel_impact": 1225, - "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEAFLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDMGNGQYSL", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAAATAGGGGGGCAATTTAAAGAAGCTTTCTTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTAAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATATGGGAAATGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTGCTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4948, - "end": 5526, - "orientation": "forward", - "distance": 0.37823834196891193, - "indel_impact": 0, - "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", - "nucleotides": "ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5466, - "end": 5756, - "orientation": "forward", - "distance": 0.2886597938144331, - "indel_impact": 0, - "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", - "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5737, - "end": 5951, - "orientation": "forward", - "distance": 0.25, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGCAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5876, - "end": 5951, - "orientation": "forward", - "distance": 0.46153846153846145, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5968, - "end": 6213, - "orientation": "forward", - "distance": 0.6931034482758622, - "indel_impact": 0, - "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL", - "nucleotides": "ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 6131, - "end": 8671, - "orientation": "forward", - "distance": 0.5251716247139588, - "indel_impact": 0, - "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEVHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRDKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGNMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLDQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", - "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAGATAAGATGCAGAAAGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATGATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGGGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGGGACTGAGACCTTCAGACCTGGAGGAGGAAATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGGATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTTAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8253, - "end": 8345, - "orientation": "forward", - "distance": 0.29032258064516125, - "indel_impact": 0, - "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", - "nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8254, - "end": 8529, - "orientation": "forward", - "distance": 0.423913043478261, - "indel_impact": 0, - "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", - "nucleotides": "GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8673, - "end": 9287, - "orientation": "forward", - "distance": 0.5478260869565217, - "indel_impact": 0, - "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDNEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", - "nucleotides": "ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAATGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGATACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTGCACCCTACGAGCCTGCATGGGGTGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115503.1": [ - { - "region": "gag", - "start": 817, - "end": 2316, - "orientation": "forward", - "distance": 0.2675944333996021, - "indel_impact": 0, - "protein": "MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", - "nucleotides": "ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGACGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGCTAATGTAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 2109, - "end": 5120, - "orientation": "forward", - "distance": 0.15109343936381703, - "indel_impact": 0, - "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTACTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 5065, - "end": 5643, - "orientation": "forward", - "distance": 0.36269430051813467, - "indel_impact": 0, - "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", - "nucleotides": "ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5583, - "end": 5873, - "orientation": "forward", - "distance": 0.2886597938144331, - "indel_impact": 0, - "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", - "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5854, - "end": 6068, - "orientation": "forward", - "distance": 0.25, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5993, - "end": 6068, - "orientation": "forward", - "distance": 0.46153846153846145, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 6085, - "end": 6330, - "orientation": "forward", - "distance": 0.727586206896552, - "indel_impact": 0, - "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL", - "nucleotides": "ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAAAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 6248, - "end": 8788, - "orientation": "forward", - "distance": 0.5241695303550973, - "indel_impact": 0, - "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", - "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGAAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAAAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8370, - "end": 8462, - "orientation": "forward", - "distance": 0.29032258064516125, - "indel_impact": 0, - "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", - "nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8371, - "end": 8646, - "orientation": "forward", - "distance": 0.3913043478260869, - "indel_impact": 0, - "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", - "nucleotides": "GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8790, - "end": 9404, - "orientation": "forward", - "distance": 0.5333333333333332, - "indel_impact": 0, - "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", - "nucleotides": "ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTATGAGCCTGCATGGGATGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115570.1": [ - { - "region": "gag", - "start": 687, - "end": 2186, - "orientation": "forward", - "distance": 0.2616302186878725, - "indel_impact": 0, - "protein": "MGARASILSGGKLDEWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", - "nucleotides": "ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGAATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCGACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGCTAATGTAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1979, - "end": 4990, - "orientation": "forward", - "distance": 0.14811133200795235, - "indel_impact": 0, - "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTGCTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4935, - "end": 5513, - "orientation": "forward", - "distance": 0.36269430051813467, - "indel_impact": 0, - "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", - "nucleotides": "ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTATTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5453, - "end": 5743, - "orientation": "forward", - "distance": 0.2886597938144331, - "indel_impact": 0, - "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", - "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5724, - "end": 5938, - "orientation": "forward", - "distance": 0.25, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5863, - "end": 5938, - "orientation": "forward", - "distance": 0.46153846153846145, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5955, - "end": 6200, - "orientation": "forward", - "distance": 0.6931034482758622, - "indel_impact": 0, - "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL", - "nucleotides": "ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 6118, - "end": 8658, - "orientation": "forward", - "distance": 0.5325714285714285, - "indel_impact": 0, - "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTSEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDKDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", - "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCTCAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAAGGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGGGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8240, - "end": 8332, - "orientation": "forward", - "distance": 0.29032258064516125, - "indel_impact": 0, - "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", - "nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8241, - "end": 8516, - "orientation": "forward", - "distance": 0.3913043478260869, - "indel_impact": 0, - "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", - "nucleotides": "GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8660, - "end": 9274, - "orientation": "forward", - "distance": 0.5333333333333332, - "indel_impact": 0, - "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", - "nucleotides": "ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTATGAGCCTGCATGGGATGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115509.1": [ - { - "region": "gag", - "start": 555, - "end": 2054, - "orientation": "forward", - "distance": 0.2675944333996021, - "indel_impact": 0, - "protein": "MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", - "nucleotides": "ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGACGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGCTAATGTAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1847, - "end": 4858, - "orientation": "forward", - "distance": 0.15109343936381703, - "indel_impact": 0, - "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTACTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4803, - "end": 5381, - "orientation": "forward", - "distance": 0.36269430051813467, - "indel_impact": 0, - "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", - "nucleotides": "ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5321, - "end": 5611, - "orientation": "forward", - "distance": 0.2886597938144331, - "indel_impact": 0, - "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", - "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5592, - "end": 5806, - "orientation": "forward", - "distance": 0.25, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5731, - "end": 5806, - "orientation": "forward", - "distance": 0.46153846153846145, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5823, - "end": 6068, - "orientation": "forward", - "distance": 0.727586206896552, - "indel_impact": 0, - "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL", - "nucleotides": "ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAAAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 5986, - "end": 8526, - "orientation": "forward", - "distance": 0.5241695303550973, - "indel_impact": 0, - "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", - "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGAAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAAAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8108, - "end": 8200, - "orientation": "forward", - "distance": 0.29032258064516125, - "indel_impact": 0, - "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", - "nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8109, - "end": 8384, - "orientation": "forward", - "distance": 0.3913043478260869, - "indel_impact": 0, - "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", - "nucleotides": "GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8528, - "end": 9142, - "orientation": "forward", - "distance": 0.5333333333333332, - "indel_impact": 0, - "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", - "nucleotides": "ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTATGAGCCTGCATGGGATGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115702.1": [ - { - "region": "gag", - "start": 246, - "end": 1781, - "orientation": "forward", - "distance": 0.342940038684721, - "indel_impact": 0, - "protein": "MGARASVLSGGELDKWEKIRLRPGGRKRYKLKHIVWASRELERFAVNPGLLETSEGCKQIMGQLQPALQTGSEELRSLYNTVAVLYCVHQRIDVKDTKEALDKIEEEQNKSKKKTQQAAAADTGNNSQVSQNYPIVQNMQGQMVHQAISPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVQAGPVAPGQIREPRGSDIAGTTSTLQEQIAWMTHNPPVPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKGWMTETLLVQNANPDCKTILKALGPGATLEEMMTACQGVGGPSHKARVLAEAMSQATGAHAIMMQRGNFKNQRKTVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESLRPTAPPVESFRFGEETAAPFQKQEPRDKEMSPLASLKSLFGNDQ", - "nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGCGGCGAATTAGACAAATGGGAAAAAATTCGGTTAAGGCCAGGAGGAAGGAAACGATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGCTTCGCAGTCAACCCTGGCCTGTTAGAAACATCAGAAGGCTGCAAACAAATAATGGGACAACTCCAACCAGCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAGTCCTCTATTGTGTACATCAGAGGATAGATGTAAAGGATACCAAAGAAGCTTTAGACAAAATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGACACAGCAAGCAGCAGCCGCTGACACAGGAAACAACAGCCAAGTCAGCCAAAATTACCCCATAGTGCAGAACATGCAGGGACAAATGGTACATCAGGCCATATCACCCAGAACCCTAAATGCATGGGTAAAAGTAATAGAAGAGAAGGCATTCAGCCCAGAAGTAATACCCATGTTTTCAGCACTATCAGAGGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAGGCTGCAGAATGGGATAGAGTGCATCCAGTGCAGGCAGGACCTGTTGCACCAGGCCAGATAAGGGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACACATAATCCACCCGTCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGACTAAATAAAATAGTAAGGATGTATAGCCCTGTCAGCATTTTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAGACTCTAAGAGCTGAGCAAGCTTCACAGGAAGTAAAAGGTTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACCATTTTAAAAGCATTGGGACCAGGAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCAGCCATAAGGCAAGGGTTTTGGCAGAAGCAATGAGCCAAGCAACAGGTGCACATGCCATAATGATGCAGAGAGGCAATTTTAAGAACCAAAGAAAGACTGTTAAGTGCTTCAATTGTGGCAAAGAAGGGCATATAGCCAGAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGACTGCACTGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAGGAGAGCCTCAGGCCAACAGCCCCACCAGTAGAGAGCTTCAGGTTTGGGGAAGAGACAGCAGCCCCCTTTCAGAAGCAGGAACCGAGAGACAAGGAGATGTCTCCCTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCAGTAGTCACAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1544, - "end": 4585, - "orientation": "forward", - "distance": 0.20128078817733996, - "indel_impact": 0, - "protein": "FFRENLAFPQGKAGEFPSEQTRANSPTRGEPQANSPTSRELQVWGRDSSPLSEAGTERQGDVSLSFPQITLWQRPVVTIKIGGQIKEALLDTGADDTVLEEMALPGRWKPKMIGGIGGFIKVRQYDQIAIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKIKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPIFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSITVLDVGDAYFSVPLDEEFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQCSMTKILEPFRKQNPEIVIYQYMDDLYVGSDLEIGQHRTKIEELRGHLLKWGFTTPDKKHQKEPPFLWMGYELHPDRWTVQPIKLPEKEIWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPTKELIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKISTESIVIWGKTPKFKLPIQKETWEIWWTDYWQATWIPEWEFVSTPPLVKLWYQLEKEPIVGAETFYVDGASNRETKLGKAGYITDRGRQKVVTLNDTTNQKTELQAILLALQDSGLEANIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKIYLTWVPAHKGIGGNEQVDKLVSTGIRKVLFLDGIDRAQEEHERYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQIDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKTIHTDNGSNFTSTTVKAACWWAGVKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQISKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAGGAGAGCCTCAGGCCAACAGCCCCACCAGTAGAGAGCTTCAGGTTTGGGGAAGAGACAGCAGCCCCCTTTCAGAAGCAGGAACCGAGAGACAAGGAGATGTCTCCCTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCAGTAGTCACAATAAAGATAGGGGGGCAAATAAAGGAAGCTCTATTAGACACAGGAGCAGATGATACAGTATTAGAAGAAATGGCGTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATAGCCATAGAAATTTGTGGACATAAAGCAATTGGTACAGTATTAGTAGGACCTACACCTGTCAATATAATTGGAAGAAATCTATTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAGTTAAAGCCAGGAATGGATGGCCCAAAAATTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATAGGGCCTGAAAATCCATACAATACTCCAATATTTGCCATAAAGAAAAAAGACAGTACTAAGTGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGATTAAAAAAGAAAAAATCAATAACAGTACTGGATGTGGGTGATGCCTATTTTTCAGTTCCCTTAGATGAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGCATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAATGTAGCATGACAAAAATATTAGAGCCTTTTAGAAAACAAAATCCAGAAATAGTTATCTATCAATACATGGACGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTAAGAGGACATCTGTTGAAGTGGGGGTTTACCACACCAGACAAAAAACATCAGAAGGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAGATGGACAGTACAGCCTATAAAGCTGCCAGAGAAAGAAATCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAACTGGGCAAGTCAGATTTACCCAGGAATTAAAGTAAAACAATTATGTAAACTCCTTAGGGGAACCAAAGCATTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAAATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCAACAAAAGAATTAATAGCAGAAATACAGAAGCAAGGGCAAGGCCAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCGAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAGAAAATATCTACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCCATACAAAAGGAAACATGGGAAATATGGTGGACAGACTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAGTACTCCTCCCCTAGTAAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCATCTAATAGGGAGACTAAATTAGGAAAAGCAGGATATATTACTGACAGAGGAAGACAAAAGGTTGTCACCCTAAATGACACAACCAATCAAAAGACAGAGTTACAAGCAATTCTTCTAGCATTGCAGGATTCAGGATTAGAAGCAAACATAGTGACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGATCTACCTGACATGGGTACCAGCACATAAAGGAATTGGAGGAAATGAGCAAGTAGATAAATTAGTCAGTACTGGGATTAGGAAAGTATTATTTTTAGATGGAATAGATAGGGCCCAAGAAGAGCATGAGAGATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTCAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCTATGCATGGACAAATAGACTGTAGTCCAGGAATATGGCAGCTAGATTGTACACACTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATACTTTATCTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTACCACAGTTAAGGCCGCCTGTTGGTGGGCGGGGGTCAAGCAGGAATTTGGTATTCCCTACAATCCCCAGAGTCAAGGAGTGGTAGAATCTATGAATAAAGAATTAAAGAAAATAATAGGACAGGTCAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATTCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTTCAAAAATTCAAAACTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCGGTAGTAATACAGGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4530, - "end": 5108, - "orientation": "forward", - "distance": 0.34375, - "indel_impact": 0, - "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMYVSKKTKGWFYRHHYESTHPKISSEVHIPLGDAELVVTTYWGLQPGERDWHLGQGVSIEWRKGRYRTHVDPNLADQLIHLHYFDCFSESAIRHAILGHRVSPRCEYQAGHNKVGSLQYLALAALRAPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH", - "nucleotides": "ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGTATGTTTCAAAGAAAACTAAGGGATGGTTTTATAGACATCACTATGAGAGCACTCATCCAAAAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTGAGTTGGTAGTAACAACATATTGGGGTTTGCAGCCAGGGGAAAGGGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAGGGAGATATAGAACACACGTGGACCCTAACCTAGCAGACCAACTAATTCATCTGCATTACTTTGATTGTTTTTCAGAATCTGCTATAAGACATGCCATATTAGGACATAGAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCACTAGCAGCATTAAGAGCACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACTAAACTAACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5048, - "end": 5338, - "orientation": "forward", - "distance": 0.34375, - "indel_impact": 0, - "protein": "MEQAPEDQGPQREPYNEWTLELLEELKQEAVRHFPRPWLHSLGQYIYETYGDTWAGVEAIIRMLQQLLFIHFRIGCQHSRIGIIPQRRARNGSSRS", - "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAGAGCTTTTAGAAGAACTTAAGCAGGAAGCTGTTAGGCATTTTCCTAGGCCATGGCTTCATAGCTTAGGGCAATATATCTATGAAACTTATGGGGATACTTGGGCAGGGGTGGAAGCCATAATAAGAATGCTGCAACAACTGCTGTTTATTCATTTCAGAATTGGGTGTCAACATAGCAGAATAGGCATTATCCCACAGAGGAGAGCAAGAAATGGATCCAGTAGATCCTAA", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5319, - "end": 5533, - "orientation": "forward", - "distance": 0.6818181818181821, - "indel_impact": 0, - "protein": "MDPVDPNLEPWKHPGSQPKTACNNCYCKKCCLHCQVCFTRKGLGISYGRKKRRRRRGSLKGRQAHQDSLSKX", - "nucleotides": "ATGGATCCAGTAGATCCTAACCTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAGACTGCTTGTAACAATTGCTATTGTAAAAAGTGTTGCCTTCATTGCCAAGTTTGTTTCACACGAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACGGAGACGAGGATCTCTTAAAGGCCGTCAGGCTCATCAAGACTCTCTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5458, - "end": 5533, - "orientation": "forward", - "distance": 0.6923076923076923, - "indel_impact": 0, - "protein": "MAGRSGDGDEDLLKAVRLIKTLYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACGGAGACGAGGATCTCTTAAAGGCCGTCAGGCTCATCAAGACTCTCTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5550, - "end": 5807, - "orientation": "forward", - "distance": 0.945263157894737, - "indel_impact": 0, - "protein": "MLSLEVIVAITALVVAGIIAIVVWTIVLIEYRKILRQRKIDKILDRIRERAEDSGNESEGDQEELSALVEMGHNAHHAPWDIND", - "nucleotides": "ATGTTATCTTTAGAAGTAATAGTAGCAATAACAGCATTAGTAGTAGCAGGAATAATAGCAATAGTTGTGTGGACCATAGTACTTATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAAGATACTTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAGGGGGATCAGGAAGAATTGTCAGCGCTTGTGGAGATGGGGCACAATGCTCACCATGCTCCTTGGGATATTAATGATTAGTAA", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 5716, - "end": 8273, - "orientation": "forward", - "distance": 0.6706208425720628, - "indel_impact": 39, - "protein": "MRVRGIRKNCQRLWRWGTMLTMLLGILMISNATEQLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEMVLINVTENFNMWKNDMVDQMQEDIVSLWDQSLKPCVKLTPLCVTLNCTNLTIEPNNATKANISGRLEGKGEMTNCSFNVTTSLRDKRKKEYALFYKLDVVATGENNNSFRLISCNTSEITQACPKVSFEPIPIHYCAPAGFAILKCNDKKFNGTGKCNNVSIVQCTHGIRPVVSTQLLLNGSLAEEEVVVRSANFSDNTKTIIVQLNKTVVINCTRPNNNTRRSIHIAPGRAFYATGDIIGDIRKAHCNISKEDWNTTLNQVAKKLQEQFENATIDFKPSSGGDPEIVMHSFNCGGEFFYCNTTELFSWNATTKLFTWNATNSNNGTIILPCRIKQIINMWQEVGKAMYAPPIRGQIRCSSNITGLLLTRDGGTNGTGNRNETFRPGGGNMKDNWRSELYKYKVVEIKPLGVAPTKAKRRVVQREKRAVTIGAMFLGFLGAAGSTMGAASLTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLRDQQLLGIWGCSGKLICTTTVPWNISWSNRTLNNIWDNLTWMQWDTEINNYTNKIYQLLEEAQNQQEKNEQELLELDKWANLWNWFDISNWLWYIKIFILIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTRFPVPRGPDRPEGTEEEGGERDRDRSDRLVNGFLTLIWVDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNVLQYWSQELKNSAVSLLNATAIVVAEGTDRIIELAQRICRAE", - "nucleotides": "ATGAGAGTGAGGGGGATCAGGAAGAATTGTCAGCGCTTGTGGAGATGGGGCACAATGCTCACCATGCTCCTTGGGATATTAATGATTAGTAATGCTACAGAACAATTGTGGGTCACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAACAACCACTCTATTCTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAATGGTATTAATAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGATCAAATGCAAGAGGACATAGTCAGCTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACCTTAAATTGCACTAATTTGACCATTGAGCCAAACAATGCTACTAAAGCCAATATTAGTGGGAGGTTAGAGGGGAAAGGAGAAATGACAAACTGCTCTTTCAATGTCACCACAAGCCTAAGAGATAAGAGGAAGAAAGAATATGCACTCTTTTATAAACTTGATGTAGTAGCAACAGGTGAAAATAATAACAGCTTTAGGTTGATAAGTTGTAATACCTCAGAGATTACACAGGCCTGTCCAAAGGTATCATTTGAACCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAATGATAAAAAGTTCAATGGAACAGGAAAATGTAACAATGTCAGCATAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAGTTAGATCTGCCAATTTCTCAGACAATACTAAGACCATAATAGTACAGCTGAACAAAACTGTAGTAATTAATTGTACAAGACCCAACAACAATACAAGGAGAAGTATACATATAGCACCAGGGAGAGCATTTTATGCAACAGGAGATATAATAGGAGATATAAGAAAAGCACATTGTAACATTAGTAAAGAAGATTGGAATACCACTTTAAACCAGGTGGCTAAAAAATTACAAGAACAATTTGAGAATGCAACAATAGACTTTAAACCATCCTCAGGAGGGGACCCAGAAATTGTAATGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACAACGGAACTATTTTCTTGGAATGCTACAACAAAACTGTTTACTTGGAATGCTACAAATAGCAATAATGGAACCATCATACTCCCATGTAGAATAAAACAAATTATAAACATGTGGCAAGAGGTAGGAAAAGCAATGTATGCCCCTCCCATTCGTGGACAAATTAGATGTTCGTCAAATATTACAGGACTGCTATTAACAAGAGATGGTGGGACTAACGGGACAGGGAACAGGAATGAGACCTTCAGACCTGGAGGGGGAAATATGAAGGACAATTGGAGAAGTGAATTATATAAATACAAAGTAGTAGAAATTAAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGACCATAGGAGCTATGTTCCTTGGGTTCCTGGGGGCAGCAGGAAGCACTATGGGCGCAGCATCACTGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATCTGCTGAGGGCGATTGAGGCGCAGCAGCATCTGTTGCAACTCACAGTCTGGGGCATAAAACAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGGTACCTAAGAGATCAACAGCTCCTGGGGATTTGGGGCTGCTCTGGAAAACTCATCTGCACCACTACTGTGCCTTGGAATATTAGTTGGAGTAATAGAACTCTGAATAACATTTGGGACAATTTGACTTGGATGCAGTGGGATACAGAAATTAACAATTACACAAACAAAATATACCAATTACTTGAGGAAGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAATTTGTGGAATTGGTTTGACATATCAAACTGGCTGTGGTACATAAAAATATTCATATTAATAGTAGGAGGCTTAATAGGTTTAAGAATAGTTTTTACTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTTCAGACCCGCTTCCCAGTCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGACAGATTAGTGAACGGATTCTTGACACTTATCTGGGTCGATCTGAGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCACTCAAATATTGGTGGAATGTCCTGCAATATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTAAATGCCACAGCCATAGTAGTAGCTGAGGGGACAGATAGGATTATAGAATTAGCACAAAGAATTTGTAGAGCAGAATAAGACAGGGCTTGAAAAGGCTTTTGCTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 7874, - "end": 7966, - "orientation": "forward", - "distance": 0.29032258064516125, - "indel_impact": 0, - "protein": "RPASQSRGDPTGPKEPKKKVERETETDPTD", - "nucleotides": "AGACCCGCTTCCCAGTCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGACAGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 7875, - "end": 8150, - "orientation": "forward", - "distance": 0.326086956521739, - "indel_impact": 0, - "protein": "DPLPSPEGTRQARRNRRRRWRERQRQIRQISERILDTYLGRSEEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSTQILVECPAILESGTKE", - "nucleotides": "GACCCGCTTCCCAGTCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGACAGATTAGTGAACGGATTCTTGACACTTATCTGGGTCGATCTGAGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCACTCAAATATTGGTGGAATGTCCTGCAATATTGGAGTCAGGAACTAAAGAATAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8275, - "end": 8895, - "orientation": "forward", - "distance": 0.5454976303317536, - "indel_impact": 0, - "protein": "MGGKWSKHSKSEWADVRERMAQTEAAADGVGAVSRDLERHGAITSSNTATNNAACAWLEAQEEEEVGFPVKPQVPLRPMTYKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRFPLCFGWCFKLVPVDPDKVEEANKGENNSLLHPMSLHGMEDTEREVLMWKFDSRLAFHHVAREKHPEYFKDC", - "nucleotides": "ATGGGTGGCAAGTGGTCAAAACATAGTAAGAGTGAATGGGCTGATGTAAGGGAAAGAATGGCACAAACTGAGGCAGCAGCAGATGGGGTGGGAGCAGTATCTCGAGATCTGGAAAGACATGGAGCAATCACAAGTAGCAATACAGCAACTAACAATGCTGCTTGTGCTTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCAAACCTCAGGTGCCTTTGAGACCAATGACCTACAAGGGAGCTTTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGTTAATTTATTCCCAAAAAAGACAAGACATCCTTGATCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGAACCAGATTCCCACTGTGCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGATAAGGTGGAAGAAGCCAATAAAGGAGAGAACAACAGCTTGTTACACCCTATGAGCCTGCATGGGATGGAGGACACCGAGAGAGAGGTGTTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACGTAGCCAGAGAGAAACATCCGGAGTACTTCAAGGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115095.1": [ - { - "region": "gag", - "start": 188, - "end": 1696, - "orientation": "forward", - "distance": 1.8384000000000005, - "indel_impact": 806, - "protein": "MTETLLVQNANPDCKTILKALRPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTKSASIMVQGGNLKNQRKNVKCFNCGKEGHTAKNCRAPKKKGC", - "nucleotides": "ATAGGTGCGAGAGCGTCAGTATTAAGCGGCGGAGAATTAGATAGATAGGAAAAAATTTACCTAAGGCCAGGGAGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTCAACCCTGGCCTGTTAAAGTCATCAGAAGGCTGCAGGCAAATTCTAGGACAGCTACAACCAGCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAATAGCAGTCCTCTATTGTGTACATCAAAAGATAAATGTAAAAGACACCAAAGAAGCTCTAGACAAAATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAACAAGCAGCAGCTGACACAGGAAACAGCAGCAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACCTACAAGGGCAAATAGTACATCAACCCATATCACCTAGAACTTTAAATGCATAGGTAAAAGTAATAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCACTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGAGAAGACATCAAGCAGCCATGCAGATGTTAAAAGAAACCATCAATGATGAAGCTGCAGAATAAGATAGATTGCATCCAGTGCATGCAGGCCCTATTGCACCAGGCCAGATGAAGGAACCAAGAAGAAGTGACATAGCAGGAACCACTAGTACCCTTCAGGAACAAATAAGATAGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATAGATAATCCTGAGGTTAAATAAAATAGTAAAAATGTATAGCCCTGTCAGCATTTTGGACATAAGACAAAGACCAAAGGAACCCTTTAAGGATTATGTAGACCGGTTCTATAAAACTCTAAAGGCTGAACAAGCGTCACAGGATGTAAAAAATTAGATGACAGAAACCTTGTTAGTCCAAAATGCGAATCCAGATTGTAAGACCATTTTAAAAGCATTAAGACCAGCAGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCACAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAAGTCAGCCTCCATAATGGTGCAAGGAGGCAATTTAAAGAACCAAAGAAAGAATGTTAAGTGTTTCAATTGTGGCAAAGAAGGACACACAGCCAAAAATTGCAGGGCCCCTAAGAAAAAAGGCTGTTAGAAATGTGGAAAGAAAGGACACCAAATGAAAGATTGTACTAAGAGACAGGCTAATTTTTTAAGGAAGATTTGGCCTTCCCACAAGGGAAGGCCAAAGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTAAGGAAGAGGCAACAGCTCCCCCTCAGAAGCAGGAGACGAAGGACCAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1489, - "end": 4500, - "orientation": "forward", - "distance": 0.45376984126984143, - "indel_impact": 1865, - "protein": "KPKIIEGIRGFIKVRQYDQVPIKICRHKAISTVLIRPTPVNIIRRNLLTQLSCTLNFPISPIETVPVKLKPGIDSPKVKQWPLTEEKIKALIEICAEIEKERKITKIRPKNPYNTPVFAIKKKDSTK", - "nucleotides": "TTTTTTAAGGAAGATTTGGCCTTCCCACAAGGGAAGGCCAAAGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTAAGGAAGAGGCAACAGCTCCCCCTCAGAAGCAGGAGACGAAGGACCAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAAAAATAAGAAGGCAACTAAAAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAAAAGAAATAAATTTGCCAGGAAAATAGAAACCAAAAATAATAGAAGGAATTAGAGGTTTTATCAAAGTAAGACAGTATGATCAGGTACCCATAAAAATTTGTAGACATAAAGCTATAAGTACAGTATTAATAAGACCTACACCAGTCAACATAATTAGAAGAAATCTGTTGACCCAGCTTAGTTGCACTTTAAATTTTCCCATCAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATAGATAGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAGGCATTAATAGAAATTTGTGCAGAAATAGAAAAAGAAAGGAAAATTACAAAAATTAGGCCTAAGAATCCATATAATACTCCAGTGTTTGCCATAAAGAAAAAAGACAGTACTAAATAGAGAAAATTAGTAGATTTCCGAGAACTTAATAAAAGAACACAAGACTTTTAAAAAGTTCAACTAAGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTAGATGTAGGTGATGCATATTTTTCAGTTCCTTTAGACAAAGACTTCAGAAAGTATACTGCATTTACCATACCCAGTATAAACAATGAGGCACCAAGAGTTAGATATCAGTACAATGTGCTTCCACAAAGATAGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAAGAAAATCCAGACATAGTTATCTATCAATACATAGATGATTTGTATGTAAGATCTGACTTAGAAATAGAGCAGCACAGAACAAAAATAGAAGAACTGAGACAACATCTGTTAAAGTAAAGACTCACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTTCTTTAGATAAGTTATGAACTCCATCCTAATAAATGGACAGTACAGCCTATACAGCTGCCAGACAAAGACAGCTAGACTGTCAATGACATACAGAAGTTAGTAAGAAAGTTAAATTAGGCAAGCCAGATCTATCCAGAGATTAAAGTAAAGCAATTATGTAAACTCCTTAGAAGAACCAAAGCACTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAAGGAGATTCTAAAAGAACCAGTACATAGAGCATATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGAGGAGACCAATGGACATATCAAATTTATCAGAAGCCATTTAAAAATCTGAAAACAAAGAAATATGCAAGAACGAGAGGTGCCCACACTAATGATGTAAAACAGTTAACAGAGGCAGTGCAAAAAATAGCCCTAGAAGCCATAGTAATATAGAGAAAGACTCCTAAATTTAAACTACCTATACAAAAAGAAACATAAGAAATGTAGTAGACAGAGTATTGGCAAGCCACCTAGATTCCTGAGTAGGAATTTGTCAATACCCCTCCCTTAGTAAAATTATAGTACCAGTTAGAGAAAGAACCCATAGTAAGAGCAGAAACTTTCTATGTAGATAGGGCAGCTAATAGAGAGACTAAATTAAGAAAAGCAAGGTATGTTACGGACAGAAGAAGACAAAAAGTTGTCTCCCTAATAGACACAACAAATCAGAGGACTAAGTTACACGCAATTCATCTAGCTTTGCAGGATTCAGGATCAACAGTAAACATAGTGACAGACTCACAATATGCCTTAAAGATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGCTAATAAAAAAAGAAAAAATTTACCTGGCATAAGTCCCAGCACACAAAAGAATTAGAAGAAATGAACAAGTAGATAAATTAGTCAGTAGTAGAATCAGAAAAGTACTATTTCTAGATAGAATAGATAAAGCCCAAGAAGAACATGAAAAATATCACAGTAATTAAAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAGTGTCAGTTAAAAAGAGAACCTATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAAGAAAAATTATCCTAGTAGCAGTTCATGTAGCCAGTAGATACATAGAAGCAGAAGTTATTGCAGCAGAAACAGGGCAGGAAACAGCATACTTTATCTTAAAATTGGCAGGAAGATGGCCAGTAAAAATAATACATACAGACAATGGCAGCAATTTCACCAGCACTACAGTTAAGGCCGCCTGTTAGTAGGCAAGGATCAAGCAGAAATTTAGTATTCCCTACAATCCTCAAAGTCAAGGAGTAGTAGAATCTATGAATAATGAATTAAAGAAAATTATAGGACAGGTAAAAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATTCACAATTTTAAAAGAAAAAGGGGGATTGGGGGGTACAGTGCAGAGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAACTACAAAAACAAATTTCAAAAATTCAAAATTTTCAGGTTTATTACAGGGACAGCAGAGAACCACTTTGGAAAGGACCAGCAAAGCTCCTCTAGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCCTTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAAGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4445, - "end": 5023, - "orientation": "forward", - "distance": 1.8546875000000016, - "indel_impact": 332, - "protein": "MENRWQVMIVWQVDKMRIRTWNSLVKHHMHISKKAQR", - "nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAAGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATATTTCAAAGAAAGCTCAGAGATGAGTTTACAGACATCACCATGAAAGCCATAATCCAAAAACAAGTTCAGAAGTACACATCCCATTAAGGGAAGCAAGATTAGTAATAAAAACATATTAAGGTCTGCATACAGGAGAAAGAGACTGGCATTTAGGCCAGGGAGTATCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGGCCTGGCAGACCAACTAATTCACCTGTATTATTTTGATTGTTTTTCAGAATCTGCTATAAGGAATGCCATATTAGGACATAGAGTTAGGCCTAGGTGTGAATATCAGGCAGGACATAACAAGGTAGGATCCCTACAATACTTGGCATTAACAGCATTAATAACACCAAAAAGGAGAAAGCCACCTTTGCCCAGTGTTACAAAATTAACAGAGGACAGATGGAACAAGTCCCAGAGGACCAAAGGCCACAAAAGGAGCCATACAATGAATGGACATTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 4963, - "end": 5253, - "orientation": "forward", - "distance": 0.5, - "indel_impact": 0, - "protein": "MEQVPEDQRPQKEPYNEWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDTWAEVEAIIRTLQQLLFIHFRIRCQHSRIRIIRQRRARNRASRS", - "nucleotides": "ATGGAACAAGTCCCAGAGGACCAAAGGCCACAAAAGGAGCCATACAATGAATGGACATTAGAGCTCTTAGAAGAACTTAAGAGTGAAGCTGTTAGGCATTTTCCTAGACCATGGCTTCATAGCTTAGGACAATATATCTATGAAACTTATGGAGATACTTGGGCAGAGGTAGAAGCCATAATAAGAACCCTGCAACAACTGTTGTTCATTCATTTCAGAATTAGGTGTCAACATAGCAGGATAAGAATTATTCGACAGAGGAGAGCAAGAAATAGAGCCAGTAGATCCTAA", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5234, - "end": 5448, - "orientation": "forward", - "distance": 0.6756756756756757, - "indel_impact": 35, - "protein": "MTACNNCYCKRCCFHCQVCFTRKGLGISHGRKKRRQRRRASHSSQNHQAALPEX", - "nucleotides": "ATAGAGCCAGTAGATCCTAACTTAGAGCCCTGGAAACATCCAGGAAGTCAACCTATGACTGCTTGTAACAATTGCTATTGTAAACGGTGTTGCTTTCATTGCCAAGTTTGCTTCACAAGAAAAGGCTTAGGCATCTCCCATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5373, - "end": 5448, - "orientation": "forward", - "distance": 0.5925925925925926, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLTAVRIIKRLYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5465, - "end": 5710, - "orientation": "forward", - "distance": 1.8817073170731702, - "indel_impact": 122, - "protein": "MHALEIAAIVRLVVAAIIAIVV", - "nucleotides": "ATGCATGCCTTAGAAATAGCAGCAATAGTAAGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTAGTCTATAGTATTAATAGAATATAAGAAAATTTTAAGACAAAAGAAAATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAAAGTGACGAGGACCAGGAAGAATTATCAGCAATTGTAGAGATAGGGCATCTTGTTCCTTAGGATAGTGATGATATGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 5628, - "end": 8228, - "orientation": "forward", - "distance": 1.8735260115606882, - "indel_impact": 1426, - "protein": "MTEEGEIKNCSFNVTTGIRDKVTKEHALFYKLDVVPIDESSGNTTGKYRMINCNTSVITQACPKVSFEPIPIHFCAPASFAILKCKDKKFNRTGECRNVSTVQCTHRIRPVVSTQLLLNGSLAEEEIVIRSANLSNNAKTIIVQLNKSVKINCTRPNNNTRRSIHIGPRRAFYRTDIIGDIRQAHCNISRKD", - "nucleotides": "ATGAAAGTGACGAGGACCAGGAAGAATTATCAGCAATTGTAGAGATAGGGCATCTTGTTCCTTAGGATAGTGATGATATGTAGTGCCAACAACTTGTAGGTCACAGTCTATTATGAGGTACCTGTATGGAAAGAAGCAACTACCACTCTATTTTGTGCATCAGATGCTAAAGCATATAAGACAGAGAAGCATAATGTTTGGGCAACCCATGCCTGTGTACCCACAGACCCCAGCCCACAGGAAGTAGCATTAGAAAATGTGACAGAAACATTTAACATGTGGAAAAATGACATGGTAGAGCAGATGCATGAGGATATAATCAGTTTATAAGATCAAAGCCTAAAGCCATGTGTGAAACTAACCCCACTCTGTGTTACTTTAAATTGCACTGATGAATTAAATCTAAATTGCCCTAACAATAATACTTGTAGTAATAATACTAAATATAATATGACGGAAGAAGGAGAAATAAAAAACTGCTCTTTCAATGTCACCACAGGAATAAGAGATAAGGTGACAAAAGAACATGCACTTTTCTATAAACTTGATGTAGTACCAATAGATGAAAGTAGTGGAAATACTACAGGCAAATATAGGATGATAAATTGTAACACCTCAGTCATTACACAAGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTTTTGTGCCCCGGCTAGTTTTGCGATTCTAAAGTGTAAAGATAAGAAATTCAATAGAACAGGAGAATGTAGAAATGTCAGCACAGTACAATGTACACATAGAATTAGGCCAGTAGTATCAACTCAACTGCTGCTGAACGGCAGTCTAGCAGAAGAAGAGATAGTAATTAGATCTGCCAATCTCTCGAACAATGCTAAAACCATAATAGTACAGCTGAATAAATCTGTAAAAATTAATTGTACAAGACCCAACAATAATACAAGAAGAAGTATACACATAGGACCAAGGAGGGCATTTTATAGAACAGACATAATAGGAGATATAAGACAAGCGCATTGTAACATTAGTAGGAAAGATTAGAATGACACTTTAAAACAGATAGTTATAAAATTAAAAGAAAAATTTAAGAATAAAACAATAGTCTTTAATCAATCCTCAGGAAAGGACCCAGAGATAGTGATGCATAGTTTTAATTGTAGAGAAGAATTTTTCTACTGTAATACAACACAGCTGTTTAATAGTACTTAGAATAATAATACTTAGAATGGTACTGATAATTAGAATAGTACTGAATCAAATAGCACTATCACACTCCCATGCAGAATAAAACAAATTATAAACTTGTGGCAGGAAGTAAGAAGAGCAATGTATGCCCCTCCCATCCAAGGGCAAATTAGATGTTCATCAAATATTACAGGGCTGCTGCTAGTAAGAGATAGTAAGAGCAACAATAGTAGTAATGATACAAAGACCTTCAGGCCTAGAAGAGGAGATATGAAGGACAATTAGAGAAGTGAATTATATAAATATAAAGTAGTCAAAATTGAACCATTAAGAATAGCACCCACCCATGCAAAGAGAAGAGTGGTGCAGAAAGAAAAAAGAGCAATAGGACTTAGAGCTTTCTTCCTTAAGTTCTTAGGAGCAGCAGGAAGCACTATAGGCGCAGCGTCAGTGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTAAGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTTTAGAAAGATACCTAAAAGATCAACAGCTCCTGAAGATTTGAGGTTGCTCTAGAAAACTCATTTGCACCACTAATGTGCCCTAAAATGTTAGTTAGAGCCCTAGATAGAATAGATCTCTAGATAAGATTTAGACTAACATGACCTAGAAGCAGTAGGAAAAAGAAATTGACAATTATACAGACACAATATATAACTTAATTGAACAATCACAGAACCAACAAGAACAGAATGAACAAGACTTATTAGCATTAGATAAGTAGGCAAGTTTGTAGAATTAGTTTGACATTACACAGTGGCTATAGTATATAAAAATATTCATAATGATAGTAAGAGGCTTGATAAGTTTAAGAATAGTTTTTACTATACTGTCTATAGTGAATAGAGTTAGGCAGAGATACTCACCATTGTCATTGCAGACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAAGAACAGAAGAAGGAGGTAGAGAGAAAGACAGAGGCACATCCACGAGATTAGTGCATAGATTCTTAGCACTCATCTAGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAGAGACTTACTCTTGATTGTGGCGAGGATTGTAGAACTTCTAAGACGCAGAAAGTAAGAAGCACTGAAGTATTGGTAGAATCTCCTGCAGTATTAGAGTCAGGAAATAAAGAATAGTGCAGTTAGCTTGCTCAACACCACAGCAATAGCAGTAGCTGAAAGGACAGATAAGATTATAGAAGTATTACAAAGAGGCTTTAGAGCTATTCTCCACATACCTACACGAATAAGACAGGGCTTAGAAAAAGCTTTGCTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 7810, - "end": 7902, - "orientation": "forward", - "distance": 0.6774193548387097, - "indel_impact": 0, - "protein": "RPSSQPRGDPTGPKEQKKEVERKTEAHPRD", - "nucleotides": "AGACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAAGAACAGAAGAAGGAGGTAGAGAGAAAGACAGAGGCACATCCACGAGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 7811, - "end": 8086, - "orientation": "forward", - "distance": 0.6195652173913044, - "indel_impact": 69, - "protein": "ILSTHLGRPAEPVPLQLPPLERLTLDCGEDCRTSKTQKVRSTEVLVESPAVLESGNKE", - "nucleotides": "GACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAAGAACAGAAGAAGGAGGTAGAGAGAAAGACAGAGGCACATCCACGAGATTAGTGCATAGATTCTTAGCACTCATCTAGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAGAGACTTACTCTTGATTGTGGCGAGGATTGTAGAACTTCTAAGACGCAGAAAGTAAGAAGCACTGAAGTATTGGTAGAATCTCCTGCAGTATTAGAGTCAGGAAATAAAGAATAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8230, - "end": 8871, - "orientation": "forward", - "distance": 0.8680555555555556, - "indel_impact": 40, - "protein": "MRQARPVRKPEPAATKVRAASRDLERHGALTSSNTAATNADVACLEAQQEEKEVGFPVRPQVPLRPMTYKGALDISHFLKEKGGLDGLIYSKRRQDILDLWLYNTQGYFPDWQNYTPRPGVRFPLCFRWCFKLVPVDPDKVEEASVGENNCLLSPENLHRIEDEHREVLQWRFDSRLAFHHVARELHPEYYKDC", - "nucleotides": "ATAGGTAACAAGTTGTCAAGAAGGCTCAGGGCTAGATGGCCTGCCATAAAAGAAAGAATGAGACAAGCTAGGCCAGTAAGAAAGCCAGAGCCAGCAGCAACTAAGGTAAGAGCAGCATCTCGAGACCTAGAAAGACATGGAGCACTTACAAGTAGCAATACAGCAGCTACCAATGCTGATGTTGCCTGCCTAGAAGCACAACAGGAAGAAAAAGAGGTAGGTTTTCCAGTCAGACCTCAGGTACCTTTAAGGCCAATGACTTACAAAGGAGCTCTAGATATTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGATGGGCTAATTTACTCCAAAAGAAGACAAGACATCCTTGATCTGTGGCTCTACAACACACAAGGCTACTTCCCTGACTGGCAGAACTACACACCAAGGCCAGGAGTCAGATTTCCACTGTGTTTTAGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGATAAGGTAGAAGAGGCCAGTGTAGGAGAGAACAACTGCTTGTTAAGCCCCGAAAACCTGCATAGAATAGAAGACGAACACAGAGAAGTATTGCAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTATTACAAGGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115490.1": [ - { - "region": "gag", - "start": 549, - "end": 2048, - "orientation": "forward", - "distance": 0.2675944333996021, - "indel_impact": 0, - "protein": "MGARASILSGGKLDSWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", - "nucleotides": "ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATTCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGTCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGTTAATGTAATGATGCAAAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1841, - "end": 4852, - "orientation": "forward", - "distance": 0.15109343936381703, - "indel_impact": 0, - "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVHQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACGTGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCCATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTGCTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4797, - "end": 5375, - "orientation": "forward", - "distance": 0.37823834196891193, - "indel_impact": 0, - "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPDLADQLIHLYYFDCFSESAIRNAILGHIVNPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", - "nucleotides": "ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACACATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTGACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAATCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5315, - "end": 5605, - "orientation": "forward", - "distance": 0.2886597938144331, - "indel_impact": 0, - "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", - "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5586, - "end": 5800, - "orientation": "forward", - "distance": 0.25, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5725, - "end": 5800, - "orientation": "forward", - "distance": 0.46153846153846145, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5817, - "end": 6062, - "orientation": "forward", - "distance": 0.6931034482758622, - "indel_impact": 0, - "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL", - "nucleotides": "ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 5980, - "end": 8520, - "orientation": "forward", - "distance": 0.528604118993135, - "indel_impact": 0, - "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTESHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSKEGKMAEEMRNCSFNITTEIRNKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKSVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", - "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGTCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAATATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTAAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAGAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATGATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAGTGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGGGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGGGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTATTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8102, - "end": 8194, - "orientation": "forward", - "distance": 0.29032258064516125, - "indel_impact": 0, - "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", - "nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8103, - "end": 8378, - "orientation": "forward", - "distance": 0.423913043478261, - "indel_impact": 0, - "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", - "nucleotides": "GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8522, - "end": 9136, - "orientation": "forward", - "distance": 0.5333333333333332, - "indel_impact": 0, - "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTYKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", - "nucleotides": "ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTACAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTACGAGCCTGCATGGGGTGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MK115576.1": [ - { - "region": "gag", - "start": 468, - "end": 1967, - "orientation": "forward", - "distance": 0.2675944333996021, - "indel_impact": 0, - "protein": "MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", - "nucleotides": "ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGACGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGCTAATGTAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1760, - "end": 4771, - "orientation": "forward", - "distance": 0.15109343936381703, - "indel_impact": 0, - "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTACTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4716, - "end": 5294, - "orientation": "forward", - "distance": 0.36269430051813467, - "indel_impact": 0, - "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", - "nucleotides": "ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5234, - "end": 5524, - "orientation": "forward", - "distance": 0.2886597938144331, - "indel_impact": 0, - "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", - "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5505, - "end": 5719, - "orientation": "forward", - "distance": 0.25, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5644, - "end": 5719, - "orientation": "forward", - "distance": 0.46153846153846145, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5736, - "end": 5981, - "orientation": "forward", - "distance": 0.727586206896552, - "indel_impact": 0, - "protein": "MQPLHIAAIVGLIVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL", - "nucleotides": "ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAATAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 5899, - "end": 8439, - "orientation": "forward", - "distance": 0.5247139588100684, - "indel_impact": 0, - "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTPEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", - "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCCCAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8021, - "end": 8113, - "orientation": "forward", - "distance": 0.29032258064516125, - "indel_impact": 0, - "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", - "nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8022, - "end": 8297, - "orientation": "forward", - "distance": 0.3913043478260869, - "indel_impact": 0, - "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", - "nucleotides": "GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8441, - "end": 9055, - "orientation": "forward", - "distance": 0.5333333333333332, - "indel_impact": 0, - "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", - "nucleotides": "ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTACGAGCCTGCATGGGATGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "OQ092466": [ - { - "region": "gag", - "start": 825, - "end": 2360, - "orientation": "forward", - "distance": 0.21325536062378148, - "indel_impact": 0, - "protein": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPSLLETAEGCRQILGQLQPSLQTGSEELKSLYNTLATLYCVHQRIEVKDTKEALEKIEEEQNKSKKKAQQAAADTGNSSQVRHTGNSSQVSQNYPIVQNLQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTSNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSHVTNSSAIMMQRGNFRNQRKAVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPPEEIFRFVEETTTPSQKQEPIDKELYPPLASLKSLFGNDPSSQ", - "nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTGGATAGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAACCCTAGCCTGTTAGAAACAGCAGAAGGCTGTAGACAAATATTGGGACAGTTACAACCGTCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATATAATACATTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGGTAAAAGACACCAAGGAAGCCTTAGAGAAGATAGAGGAAGAGCAAAACAAAAGCAAGAAAAAGGCACAGCAAGCAGCAGCTGACACAGGAAACAGCAGCCAGGTTAGACACACAGGAAACAGCAGCCAGGTCAGCCAAAATTACCCTATAGTACAGAACCTTCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAATACCATGCTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGACTACATCCAGTGCATGCAGGGCCCATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAGTAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGGATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTACACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATCTTAAAAGCATTGGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGGGGACCCGGACATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCACGTAACAAATTCAAGTGCCATAATGATGCAGAGGGGCAATTTTAGAAACCAAAGAAAGGCTGTTAAGTGTTTCAATTGTGGCAAAGAGGGGCATATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCACCACCAGAAGAGATCTTCAGGTTTGTGGAAGAGACAACAACTCCCTCTCAGAAACAGGAGCCAATAGACAAGGAACTGTATCCTCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 2147, - "end": 5164, - "orientation": "forward", - "distance": 0.14711729622266412, - "indel_impact": 0, - "protein": "FFRENLAFPQRKAREFSPEQTRANSPTTRRDLQVCGRDNNSLSETGANRQGTVSSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEDMNLPGRWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPYRTRNPEMVIYQYMDDLYVGSDLEIGQHRAKIEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPNKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGEGQWTFQIYQEPFKNLKTGKYARARGAHTNDVKQLTEAVQKIATEGIVIWGKIPKFRLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKKGKAGYVTDRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIEKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAIHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQIIKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCACCACCAGAAGAGATCTTCAGGTTTGTGGAAGAGACAACAACTCCCTCTCAGAAACAGGAGCCAATAGACAAGGAACTGTATCCTCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAGCTAAAGGAAGCTCTATTAGATACAGGGGCAGATGATACAGTATTAGAAGACATGAATTTACCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAGGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTATTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATAAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGCATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGTATGACAAAAATCTTAGAGCCTTATAGAACACGAAATCCAGAAATGGTTATCTATCAATACATGGATGATTTATATGTAGGATCTGACTTAGAAATAGGGCAACATAGAGCAAAAATAGAGGAACTGAGACAACATCTGTTGAAGTGGGGATTTACTACCCCAGACAAAAAACATCAAAAAGAACCTCCATTCCTTTGGATGGGGTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAAACAAAGACAGCTGGACTGTCAATGACATACAGAAACTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACCCAGGGATTAAGGTAAAGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAGTAGTACCGCTAACAGAAGAAGCAGAGTTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGAACCAGTACATGGGGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGGAAGGACAATGGACATTTCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAGCGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAGCCACAGAAGGCATAGTAATATGGGGAAAAATTCCTAAATTTAGACTACCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAACACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAACAGGGAAACTAAAAAAGGAAAAGCAGGATATGTTACTGACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGATACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATACGCATTGGGAATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTCAGTCAAATAATAGAACAGTTGATAAAAAAGGAAAAGGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTTAGTGCTGGAATCAGGAAAGTATTATTTTTAGATGGAATAGAGAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAGTGTCAGCTAAAAGGAGAAGCCATACATGGACAGGTAGACTGTAGTCCAGGAATATGGCAATTAGATTGTACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTCATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATACGGTTAAGGCCGCCTGTTGGTGGGCGGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATCCAAACCAAAGAACTACAAAAACAAATTATAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTTATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 5109, - "end": 5687, - "orientation": "forward", - "distance": 0.203125, - "indel_impact": 0, - "protein": "MENRWQVMIVWQVDRMRIRTWNSLVKHHMYVSRKAKGWFYRHHFESNHPKISSEVHIPLEDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPDLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKRKPPLPSVKKLTEDRWNKPQKTKGHRGSHTMNGH", - "nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGTATGTTTCAAGGAAAGCTAAGGGATGGTTTTATAGACATCACTTTGAAAGCAATCATCCAAAAATAAGTTCAGAAGTACACATCCCACTGGAGGATGCTAGACTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGGGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGACCTAGCAGACCAACTAATTCATCTGTACTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACACATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCACTAGCAGCATTAATAACACCAAAAAAGAGAAAGCCACCCTTGCCTAGTGTTAAGAAGCTAACAGAAGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5627, - "end": 5917, - "orientation": "forward", - "distance": 0.21875, - "indel_impact": 0, - "protein": "MEQAPEDQGPQREPYNEWTLELLEELKSEAVRHFPRIWLHSLGQYIYETYGDTWAGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRARNGASRS", - "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAGCTTAAGAGTGAAGCTGTTAGACATTTTCCTAGGATATGGCTCCATAGCTTAGGGCAATATATCTATGAAACTTATGGAGATACTTGGGCAGGGGTGGAAGCCATAATAAGAATTCTGCAACAACTGTTGTTTATTCATTTCAGAATTGGGTGTCAACATAGCAGAATAGGCATTATTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5898, - "end": 6112, - "orientation": "forward", - "distance": 0.41666666666666674, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPRTACTTCYCKKCCFHCQVCFTTKGLGISYGRKKRRQRRRAPQDSQTDQGPLPKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGGCTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTGCTTGTACCACTTGTTATTGTAAAAAATGCTGCTTTCATTGCCAAGTTTGTTTCACAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGACGAGCTCCTCAAGACAGTCAGACTGATCAAGGTCCTCTACCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 6037, - "end": 6112, - "orientation": "forward", - "distance": 0.46153846153846145, - "indel_impact": 0, - "protein": "MAGRSGDSDDELLKTVRLIKVLYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGACGAGCTCCTCAAGACAGTCAGACTGATCAAGGTCCTCTACCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 6129, - "end": 6374, - "orientation": "forward", - "distance": 0.6395348837209305, - "indel_impact": 0, - "protein": "MQPLTILAIVALVVAAILAIVVWSIVLIEYRKILRQRKIDRLIDRIRERAEDSGNESEGDQEELSALVERGHLAPWNVDDL", - "nucleotides": "ATGCAACCTTTAACAATATTAGCAATAGTAGCACTAGTAGTAGCAGCAATACTAGCAATAGTTGTGTGGTCCATAGTACTCATAGAATATAGGAAAATATTAAGACAAAGGAAAATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAAAGTGAGGGGGATCAGGAAGAATTATCAGCACTGGTGGAGAGGGGGCATCTTGCTCCTTGGAATGTTGATGATCTGTAA", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 6292, - "end": 8874, - "orientation": "forward", - "distance": 0.5852808988764047, - "indel_impact": 0, - "protein": "MKVRGIRKNYQHWWRGGILLLGMLMICNATEQQLWVTVYYGVPVWKEANTTLFCASDAKAYSTEVHNVWATHACVPTDPNPQEVVLKNVTENFNMWENNMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNTTRSSGNTTNEMKNCSFYTETDIRDKKRKEYALFYELDIVPIDEDNKNKSNNISYSRLISCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNDKKFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEDEVVIKSSNFTNNAKTIIVQLNETVKINCTRPNNNTRKSIPIGPGRAFYATGDIIGDIRQAHCNISRANWTNTLKQIAEKLGKQFEENKTIVFNPSSGGDPEVVMHSFNCRGEFFYCNSTPLFNSTWKETNGIWTRIGESNDSATITLNDSDTITLQCKIRQIINLWQEVGKAMYAPPIKGQISCLSNITGLLLVRDGGNNTNGTEIFRPVGGEMRDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRATLGALFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLRDQQLLGIWGCSGKLICTTTVPWNTSWSNKSLEKIWNNMTWMEWEREIDNYTSLIYTLLEESQNQQEKNEKELLELDTWASLWNWFDITNWLWYIKIFIMIIGGLVGLRIVFTVLSIVNRVRQGYSPLSFQIHPPAPRGPDRPEGIEGEGGERDRDRSGPLVNGFLALFWVDLRSLCLFSYHRLRDLLLIVARIVELLGRRGWEALKYGWSLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEILQRACRAILHIPTRIRQGLERALL", - "nucleotides": "ATGAAAGTGAGGGGGATCAGGAAGAATTATCAGCACTGGTGGAGAGGGGGCATCTTGCTCCTTGGAATGTTGATGATCTGTAATGCTACAGAACAACAATTGTGGGTTACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAAACACCACTCTATTTTGTGCATCAGATGCTAAAGCATATAGTACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTGGTATTAAAAAATGTGACAGAAAATTTTAATATGTGGGAAAATAACATGGTAGAACAGATGCATGAAGATATAATCAGTTTATGGGATCAAAGCTTAAAGCCATGTGTAAAGTTAACTCCACTCTGTGTTACTCTAAATTGCACTAATACCACTAGGAGTAGTGGAAATACTACCAATGAAATGAAAAACTGCTCTTTCTATACCGAAACAGACATAAGAGATAAGAAGAGAAAGGAATATGCACTTTTTTATGAACTTGATATAGTACCCATAGATGAGGATAATAAGAATAAGAGTAATAATATTAGCTATTCTAGGTTAATAAGTTGCAACACCTCAGTTATTACACAAGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAATGATAAGAAATTCAATGGAACAGGGCCATGTACAAATGTCAGCACAGTGCAATGTACACATGGTATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGACGAGGTAGTAATTAAATCTAGCAATTTCACGAACAATGCTAAAACCATAATAGTACAGCTAAATGAAACTGTAAAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGTATACCTATAGGACCAGGGAGAGCATTTTATGCAACAGGAGACATAATAGGAGACATAAGACAAGCACATTGTAACATCTCTAGAGCAAACTGGACAAACACTTTAAAACAGATAGCTGAAAAATTAGGAAAACAATTTGAGGAAAATAAAACAATAGTCTTTAATCCCTCCTCAGGAGGGGACCCAGAGGTTGTAATGCACAGTTTTAATTGTAGAGGGGAATTTTTCTACTGTAATTCAACACCACTGTTTAATAGTACTTGGAAGGAGACTAATGGGATTTGGACTCGTATTGGAGAGTCAAATGATAGTGCTACTATCACACTAAATGATAGTGATACTATCACACTCCAATGTAAAATAAGGCAAATTATAAACTTGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAAAGGACAAATTAGCTGTTTATCAAACATTACAGGGCTGCTATTAGTAAGAGATGGTGGCAATAACACGAACGGGACCGAGATCTTCAGACCTGTAGGAGGAGAAATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAGAGAGAGAAAAGAGCGACATTGGGAGCTCTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAGGCCAGACTATTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTAAGGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATCTGCACCACTACTGTGCCTTGGAATACTAGTTGGAGTAATAAATCTCTGGAAAAAATTTGGAATAATATGACCTGGATGGAGTGGGAAAGAGAAATTGACAATTACACAAGCTTAATATACACCTTACTTGAAGAATCGCAGAACCAGCAAGAAAAAAATGAAAAAGAATTATTGGAATTAGATACATGGGCAAGTTTGTGGAATTGGTTTGACATAACAAACTGGCTGTGGTATATAAAAATATTCATAATGATAATAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTACTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTCCAGATCCACCCCCCAGCTCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGGAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGTCCATTAGTGAACGGATTCTTAGCACTTTTCTGGGTCGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCTCTCAAATACGGGTGGAGTCTCCTACAATATTGGAGCCAGGAACTAAAGAATAGTGCTGTCAGCTTGCTCAACGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAATATTACAAAGAGCTTGTAGAGCTATTCTCCATATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCTTTGCTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8456, - "end": 8548, - "orientation": "forward", - "distance": 0.6774193548387097, - "indel_impact": 0, - "protein": "RSTPQLRGDPTGPKESKEKVERETETDPVH", - "nucleotides": "AGATCCACCCCCCAGCTCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGGAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGTCCATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8457, - "end": 8732, - "orientation": "forward", - "distance": 0.3586956521739131, - "indel_impact": 0, - "protein": "DPPPSSEGTRQARRNRRRRWRERQRQIRSISERILSTFLGRPAEPVPLQLPPLERLTLDCSEDCGTSGTQGVGSSQIRVESPTILEPGTKE", - "nucleotides": "GATCCACCCCCCAGCTCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGGAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGTCCATTAGTGAACGGATTCTTAGCACTTTTCTGGGTCGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCTCTCAAATACGGGTGGAGTCTCCTACAATATTGGAGCCAGGAACTAAAGAATAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8876, - "end": 9508, - "orientation": "forward", - "distance": 0.43380952380952387, - "indel_impact": 0, - "protein": "MGGKWSKSKLFGWPAVRERMRRAEPAAEPAADGVGAASRDLERHGAITSSNTPTTNAACAWLEAQEEEEVGFPVRPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSPKRQEILDLWVYHTQGFFPDWDNYTPGPGIRYPLTFGWCFKLVPVEPDKVEEANEGENNCLLHPMSQHGMEDPEREVLEWRFDSRLAFRHVARELHPEYYKDC", - "nucleotides": "ATGGGTGGCAAATGGTCAAAAAGTAAGCTATTTGGATGGCCTGCTGTAAGGGAAAGAATGAGAAGAGCTGAGCCAGCAGCAGAGCCAGCAGCAGATGGGGTGGGAGCAGCATCTCGAGACCTGGAAAGACACGGAGCAATCACAAGTAGCAATACACCAACTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAAGAGGAGGAGGTGGGTTTTCCAGTCAGACCTCAGGTACCTTTAAGACCAATGACTTACAAAGCGGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCCAAAAAGACAAGAGATCCTTGATCTGTGGGTCTATCATACACAAGGTTTCTTCCCTGATTGGGATAACTACACACCAGGGCCAGGGATCAGATATCCACTGACCTTTGGATGGTGCTTCAAGTTAGTACCAGTGGAGCCAGATAAGGTAGAAGAGGCCAATGAAGGAGAGAATAACTGCTTGTTACACCCTATGAGCCAGCATGGGATGGAGGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTGGCATTTCGTCACGTGGCCCGAGAGCTGCATCCGGAGTACTACAAGGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "OQ092463": [ - { - "region": "gag", - "start": 801, - "end": 2312, - "orientation": "forward", - "distance": 0.21666666666666679, - "indel_impact": 0, - "protein": "MGARASVLSGGELDRWERIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSKGCRQILEQLQPSLQTGSEELKSLYNTVATLYCVHQKIDVKDTKEALDKIEEEQNKSKKKTQQAAADTGNNSQTSQASQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKESFRDYVDRFYKTLRAEQCTQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTTPGTIMMQRGNFRNQRKNVKCFNCGKEGHIAKNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKQEPVDKELYPLASLRSLFGNDPSSQ", - "nucleotides": "ATGGGTGCGAGAGCGTCGGTATTAAGCGGGGGAGAATTAGATAGATGGGAAAGAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAGCGGTTCGCAGTTAATCCTGGCCTGTTAGAAACATCAAAAGGCTGTAGACAAATATTGGAACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAAGATAGATGTAAAAGACACCAAGGAAGCTTTAGATAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAACACAGCAAGCAGCAGCTGACACAGGAAACAACAGCCAGACCAGCCAGGCCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTGGGACTAAATAAAATAGTAAGGATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAATCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAACAATGTACACAGGAGGTAAAAAATTGGATGACAGAAACATTATTGGTCCAAAATGCAAACCCAGATTGCAAGACTATTTTAAAAGCATTAGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCTGGCCATAAAGCAAGAGTTTTGGCAGAAGCAATGAGCCAAGTAACAACTCCAGGTACCATAATGATGCAGAGAGGCAATTTCAGGAACCAGAGAAAGAATGTTAAGTGTTTCAACTGTGGCAAAGAAGGGCATATAGCTAAAAATTGCAGGGCCCCTAGAAAAAGGGGCTGTTGGAAATGTGGAAAAGAAGGACACCAAATGAAAGATTGCACTGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAGGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGGTAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA", - "subtype_start": 140, - "subtype_end": 1642, - "subtype_aminoacids": "MGARASVLSGGQLDRWEKIRLRPGGKKKYRLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTIAVLYCVHQKIEVKDTKEALEKIEEEQNKSKKKAQQAAANTENSSQVSQNYPIVQNMQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQKGNFRNQRKIVKCFNCGKEGHIARNCRAPRKKGCWRCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPAESFRFGEETTTPSQKQETIDKELYPLTALKSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGACAATTAGATAGATGGGAGAAAATTCGGTTACGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTATTGGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCAAGCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAATAGCAGTCCTCTATTGTGTACATCAAAAGATAGAGGTAAAAGACACCAAGGAAGCTTTAGAGAAGATAGAGGAAGAACAAAACAAAAGTAAGAAAAAGGCACAGCAAGCAGCAGCTAACACAGAAAACAGCAGCCAGGTTAGCCAAAATTACCCTATAGTGCAAAATATGCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACACATAATCCACCTATCCCAGTGGGAGAAATTTACAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGCATTTTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAGACTCTAAGAGCCGAGCAAGCCTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGTCCCGGCCATAAGGCAAGAGTTTTGGCGGAAGCAATGAGCCAAGTGACAAATTCAGCTACCATAATGATGCAGAAAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCCGAAATTGCAGGGCCCCTAGGAAGAAGGGCTGTTGGAGATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAAAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCGACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGAGACAATAGACAAGGAACTATATCCTTTAACTGCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 2105, - "end": 5116, - "orientation": "forward", - "distance": 0.10756972111553775, - "indel_impact": 0, - "protein": "FFRENLAFPQGEAREFSSEQTRANSPTRRELQVWGGDNNSLSEAGAGRQGTVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEDMTLPGRWKPKMIGGIGGFIKVRQYDQILVEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIDELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTREAELELAENREILKEPVHGVYYDPTKDLIAELQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQRETWDTWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIKKDKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDDHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVTTIHTDNGSNFISTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVYIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKLQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAGGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGGTAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGACATGACTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTTGTAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTAACTCAAATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATCTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATATAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAGTCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATAAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGATGAATTAAGACAACATCTGTTGAAGTGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGATATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGTTGCCAGAAAAGGACAGTTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAAGCAATTATGTAAACTCCTTAGGGGAACCAAGGCACTAACAGAAGTAGTACCACTAACAAGAGAAGCAGAGCTAGAGCTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCAACAAAAGACTTAATAGCAGAGTTACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGGAAATATGCAAGAATGAGAGGTGCCCACACTAATGATGTAAAGCAACTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAGTTTAAACTACCCATACAAAGAGAAACATGGGACACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAATTTGTCAATACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAGAAAGAACCTATAGTAGGAGCAGAAACTTTCTATGTAGATGGTGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGATACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTTAATCAAATAATAGAACAGTTAATAAAAAAGGACAAGGTATACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGACCATGAGAAATATCACAGTAATTGGAGGGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGCCAGTTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTGGATTGTACACACTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGGTATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTATCTTAAAATTAGCAGGAAGATGGCCAGTAACAACAATACATACAGACAATGGCAGCAATTTCATCAGTACTACAGTTAAGGCCGCCTGCTGGTGGGCGGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATCATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATACATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAACTTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 1435, - "subtype_end": 4446, - "subtype_aminoacids": "FFRENLAFPQGKAREFSSEQTRADSPTSRELQVWGRDNNSLSEAGDNRQGTISFNCPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMNLPGRWKPKMIGGIGGFIKVRQYDQILVEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQCSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTKEAELELAENREILKETVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESEIVSQIIEQLIQKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESINKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCGACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGAGACAATAGACAAGGAACTATATCCTTTAACTGCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGGGCAGATGATACAGTATTAGAAGAAATGAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAATATGATCAGATACTTGTAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATTTGTTGACTCAGCTTGGTTGCACTTTAAATTTTCCTATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAAAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTGAAAAAGAAAAAATCAGTAACGGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATAAAGACTTCAGGAAATATACTGCATTTACCATACCGAGTACAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAATGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTGAGGTGGGGATTTACCACACCAGATAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAACCTATAGTGCTGCCGGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAAGCAGTTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAGTAGTACCACTAACAAAAGAGGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTCTAAAAGAAACAGTACATGGAGTGTATTATGACCCATCAAAAGATTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTAAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTCAAACTACCCATACAAAAAGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAAAAAGAGCCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCGGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTGACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAAAAGACTGAGTTACAAGCAATTCACCTAGCTTTGCAGGATTCGGGATTAGAAGTAAATATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGATAGTCAGTCAAATAATAGAGCAGTTAATACAAAAGGAAAAGGTCTACCTGGCATGGGTACCAGCACACAAGGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGAGCATGAAAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAGGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTACTACGGTTAAGGCTGCCTGTTGGTGGGCGGGGATCAAGCAGGAATTTGGCATCCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATAAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAAACAGCAGTACAAATGGCAGTATTTATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAGACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTGTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAA" - }, - { - "region": "vif", - "start": 5061, - "end": 5639, - "orientation": "forward", - "distance": 0.296875, - "indel_impact": 0, - "protein": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYISRKAKGWVYKHHYDSIHPRISSEVHIPLGDAKLVITTYWGLHTGERDWHLGQGVSIEWRKERYSTQVDPGLADQLIHRYYFDCFSESAIRNAILGRIVSPSCDYQAGHNKVGSLQYLALAALITPKKIKPPLPSVKKLTEDRWNKPQKTKGHRGSHTMNGH", - "nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGGACATGGAAAAGTTTAGTAAAACACCATATGTATATTTCAAGGAAAGCTAAGGGATGGGTTTATAAGCATCACTATGACAGTATTCATCCAAGAATAAGTTCAGAAGTTCACATCCCACTAGGGGATGCGAAATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCACTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAGGAGAGATATAGCACACAAGTAGACCCTGGCCTAGCAGACCAACTAATTCATCGGTATTACTTTGATTGTTTTTCAGAATCTGCCATAAGAAATGCCATATTAGGACGTATAGTTAGTCCTAGTTGTGATTATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTAAGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAG", - "subtype_start": 4391, - "subtype_end": 4969, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIKTWKSLVKHHMYVSKKAKGWLYRHHYQSIHPRISSEVHIPLGEASLVIKTYWGLHTGEREWHLGQGVSIEWRKGRYNTQVDPGLADQLIHLYYFDCFSESAIRNAILGHIVSPSCEYQAGHNKVGSLQYLALTALRTPKKIKPPLPSVRKLTEDRWNKPQKTKGHRESHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAAAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAAAGAAAGCTAAGGGATGGTTGTATAGACATCACTATCAAAGCATTCATCCAAGAATAAGTTCAGAAGTACATATCCCACTAGGAGAGGCTAGCTTGGTAATAAAGACATATTGGGGTCTGCATACAGGAGAAAGAGAATGGCATTTGGGTCAAGGAGTCTCCATAGAATGGAGGAAAGGAAGATATAACACACAAGTAGACCCAGGCCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGTTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCACTAACAGCATTAAGAACACCAAAGAAGATAAAGCCACCTTTGCCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5579, - "end": 5869, - "orientation": "forward", - "distance": 0.31958762886597936, - "indel_impact": 0, - "protein": "MEQAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHSLGQHIYETYGDTWAGVEAITRILQQLLFIHFRIGCQHSRIGIIQRRRARNGTSRS", - "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACACTTCCCTAGGCCATGGCTCCACAGCTTAGGGCAACATATCTATGAAACATATGGAGATACTTGGGCAGGAGTGGAAGCCATAACAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGAATTGGGTGTCAACATAGCAGAATAGGCATCATTCAACGGAGGAGAGCAAGAAATGGAACCAGTAGATCCTAG", - "subtype_start": 4909, - "subtype_end": 5199, - "subtype_aminoacids": "MEQAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRTWLHGLGQYIYENYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGITLQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACACTTTCCTAGGACATGGCTCCACGGATTAGGGCAATATATCTATGAAAATTATGGGGACACTTGGGCAGGAGTGGAGGCCATAATAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGGATTGGGTGTCGACATAGCAGAATAGGCATTACTCTACAAAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5850, - "end": 6064, - "orientation": "forward", - "distance": 0.3918918918918919, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPRTACTSCYCKQCCFHCQVCFITKGLGISYGRKKRRQRRRAHQDSQTHQASLSKX", - "nucleotides": "ATGGAACCAGTAGATCCTAGATTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTGCTTGTACCTCTTGTTATTGTAAGCAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA", - "subtype_start": 5180, - "subtype_end": 5394, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACNTCYCKKCCFHCQVCFTKKALGISYGRKKRRQRRRAPQDRQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAGACGGCTTGTAACACTTGCTATTGTAAAAAATGTTGCTTTCATTGCCAAGTTTGTTTCACAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAGGACCGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5989, - "end": 6064, - "orientation": "forward", - "distance": 0.23076923076923084, - "indel_impact": 0, - "protein": "MAGRSGDSDEELIKTVRLIKLLYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA", - "subtype_start": 5319, - "subtype_end": 5394, - "subtype_aminoacids": "MAGRSGDSDEELLRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAGGACCGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 6081, - "end": 6329, - "orientation": "forward", - "distance": 0.4878048780487805, - "indel_impact": 0, - "protein": "MQPLEISAIVALVVAIIIAIVVWSIVLIEYRKILRQRKIDRIIDRIRERAEDSGNESEGDQEELSALVVEMGHDAPWDVNDL", - "nucleotides": "ATGCAACCTTTAGAAATATCAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTACTCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGATAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGCGAGGGGGATCAGGAAGAATTATCAGCACTTGTTGTGGAGATGGGGCACGATGCTCCTTGGGATGTTAATGATCTGTAG", - "subtype_start": 5411, - "subtype_end": 5656, - "subtype_aminoacids": "MHSLQILGIVALVVAGIIAIVVWSIVIIEYRKILRQRKIDRLIDRIIERAEDSGNESEGDQEELSALVEMGHLAPWDIND**", - "subtype_nucleotides": "ATGCACTCTTTACAAATATTAGGAATAGTAGCATTAGTAGTAGCAGGAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGGAAAATAGACAGGTTAATTGATAGAATAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGATCAGGAAGAATTGTCAGCACTTGTGGAGATGGGGCATCTTGCTCCTTGGGATATTAATGATTAGTAG" - }, - { - "region": "env", - "start": 6244, - "end": 8826, - "orientation": "forward", - "distance": 0.4984072810011382, - "indel_impact": 0, - "protein": "MRARGIRKNYQHLLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFYASDAKAYGTEVHNVWATHACVPTDPNPQEVVLGNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNYNNTNTTSTKNTTPSTTASSGERMGEGEIKNCSFNITTSLRDKMQKEYALFYRPDIVPIDNDNTSYRLISCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKKFNGSGPCTNVSTIQCTHGIRPVVSTQLLLNGSLAEEEIVIRSENFSDNAKTIIVHLKDSVEITCIRPNNNTRKSIPMGPGKAFFATGAIIGDIRQAHCKINKTKWNNTLDQIFKKLREQFGNNKTIIFNHSSGGDPEIVTHNFNCGGEFFYCNTSKLFNSTWNETSSWKDESNGTITLPCRIKQIINLWQEVGKAMYAPPIRGLIKCSSNITGLLITRDGGKNNESNTTEIFRPEGGNMKDNWRSELYKYKVVKIEPLGIAPTKAKRRVVQREKRAVGIGAMFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKTLEQIWDNMTWMQWEREIDNYTSLIYTLIEESQNQQEKNELELLELDKWASLWNWFDITKWLWYIKIFIMIVGGLVGLRIVFAVLSIANRVRQGYSPLSFQTHPPAPRGPDRPEGIGEEGGERDRDRSGPLVNGFLAIIWVDLRSLCLFSYHRLRDLLLIVTRIVELLGRGGWELLKYWWNLLQYWSQELKNSAISLLNATAIAVAEGTDRIIEVLQRAGRAFLNIPRRIRQGLERALL", - "nucleotides": "ATGAGAGCGAGGGGGATCAGGAAGAATTATCAGCACTTGTTGTGGAGATGGGGCACGATGCTCCTTGGGATGTTAATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACTACCACTCTATTTTATGCATCAGATGCTAAAGCATATGGTACAGAGGTACATAATGTTTGGGCTACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGGAAATGTGACAGAGAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTGAATTGCACTAATTATAATAACACCAATACCACTAGTACCAAGAATACCACCCCTAGTACCACTGCTAGTAGCGGGGAAAGGATGGGGGAAGGAGAAATAAAAAACTGCTCTTTCAATATCACCACAAGCCTAAGAGATAAGATGCAGAAAGAATATGCACTCTTTTATAGACCTGATATAGTACCAATAGATAATGATAATACTAGTTATAGGTTGATAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCAGCTGGTTTTGCGATTCTAAAGTGTAACAATAAGAAGTTCAATGGATCAGGACCATGTACAAATGTCAGCACAATACAATGTACACATGGAATTAGGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTCTAGCAGAAGAAGAGATAGTAATTAGATCTGAAAATTTCTCAGACAATGCTAAAACCATAATAGTACATCTGAAGGATTCTGTAGAAATTACTTGTATAAGACCCAACAACAATACAAGAAAAAGTATACCTATGGGACCAGGGAAAGCATTTTTTGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAAAATTAATAAAACGAAATGGAATAACACTTTAGACCAGATATTTAAAAAGTTAAGAGAACAATTTGGGAATAATAAGACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTGTAACGCACAATTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACATCAAAACTGTTTAATAGTACTTGGAATGAGACTAGTTCTTGGAAGGATGAGTCAAATGGCACTATCACACTCCCATGCAGAATAAAGCAAATTATAAACCTGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGACTCATTAAATGTTCATCAAATATCACAGGGCTGCTAATAACAAGAGATGGTGGGAAAAATAATGAGAGCAACACCACCGAGATTTTCAGACCTGAAGGAGGAAATATGAAGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAATAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTATGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATAACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAAACTCTGGAACAGATTTGGGATAACATGACCTGGATGCAATGGGAAAGAGAAATTGACAATTACACAAGCTTGATATACACTTTAATTGAAGAATCGCAAAACCAACAAGAAAAGAATGAACTAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTGACATAACAAAGTGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTGCTTTCTATAGCGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTTCAGACCCACCCCCCAGCCCCGAGGGGACCCGACAGGCCAGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGACCATTAGTGAACGGATTCTTAGCAATTATCTGGGTCGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCGGGGGGTGGGAACTCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAAGAATAGTGCTATTAGCTTGCTTAATGCTACAGCCATAGCAGTAGCTGAGGGGACAGACAGGATTATAGAAGTATTACAAAGAGCTGGGAGAGCTTTTCTCAACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGGCTTTGCTATAA", - "subtype_start": 5574, - "subtype_end": 8123, - "subtype_aminoacids": "MRVKEIRKNCQHLWRWGILLLGILMISSAAENLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWTNNMAEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDLRNTTNTNSTAEEMEAKGEMKNCSFNITTSIRNKLQKEYALFYKLDIVPINNDNTSYRLISCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNDKKFSGNGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTDNAKTIIVQLKEPVEINCTRPNNYTRKRITMGPGRVYYTTGEIIGDIRRAHCNISSTKWNNTLGQIVKKLKEQFNNNTIVFKKSSGGDPEIVMHSFICGGEFFFCNSTKLFNSTWNSTEGNDDGEERNITLPCRIKQIVNMWQEVGKAMYAPPIGGQIRCTSNITGLLLTRDGGNQNGTNETEIFRPGGGNMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLDEIWNNMTWMQWEREINNYTGLIYTLIEESQNQQEKNELDLLQLDKWASLWNWFDITNWLWYIKIFIMIVGGLVGLRIIFTVLSIVNRVRQGYSPLSFQTHLPAPRGPDRPGGIEEEGGERDRDTSGRLVDGFLAIFWVDLRNLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVLQRVYRAILNIPTRIRQGLERALL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGATCAGGAAGAATTGTCAGCACTTGTGGAGATGGGGCATCTTGCTCCTTGGGATATTAATGATTAGTAGTGCTGCAGAAAATTTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAAGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAAYATGTGGACAAATAACATGGCAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGTCTAAAGCCATGTGTAAAATTAACTCCACTCTGTGTTACTTTAAATTGCACTGATTTGAGAAATACTACTAATACCAATAGTACCGCCGAGGAAATGGAGGCGAAAGGAGAAATGAAAAACTGCTCTTTCAATATCACCACAAGCATAAGGAATAAGTTGCAGAAAGAATATGCACTCTTTTATAAACTTGATATAGTACCAATAAATAATGATAATACTAGCTATAGGTTGATAAGTTGTAACACCTCAGTCATTACCCAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAATGATAAGAAGTTCAGTGGAAACGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTATCAACTCAACTGCTGCTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTTACAGACAATGCTAAAACCATAATAGTACAGCTGAAAGAACCTGTAGAAATTAATTGTACAAGACCTAACAACTATACAAGGAAAAGAATAACTATGGGACCAGGGAGAGTATATTATACAACAGGAGAAATAATAGGAGATATAAGACGAGCACATTGTAACATTAGTAGCACAAAATGGAATAACACTTTAGGACAGATAGTTAAAAAATTAAAAGAACAATTTAACAATAATACAATAGTCTTTAAGAAATCCTCAGGAGGGGACCCAGAAATTGTAATGCACAGTTTTATTTGTGGAGGGGAATTTTTCTTCTGTAATTCAACAAAACTGTTTAATAGTACTTGGAATAGCACTGAAGGAAATGACGATGGAGAGGAAAGAAATATCACACTCCCATGCAGAATAAAACAAATTGTAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCGGAGGACAAATTAGATGCACCTCAAATATTACAGGGCTGCTATTAACAAGAGATGGAGGTAACCAAAATGGGACCAACGAGACTGAAATCTTCAGACCTGGAGGAGGAAATATGAGGGACAATTGGAGAAGTGAACTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACTAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAGGCCAGACTATTATTGTCTGGTATAGTGCAACAGCAGAACAATCTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCTTAGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATCTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGATGAGATTTGGAATAACATGACCTGGATGCAATGGGAAAGAGAAATTAACAATTACACAGGCTTAATATACACCTTAATTGAAGAATCGCAAAACCAACAAGAAAAGAATGAACTAGATTTACTGCAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTGACATAACAAACTGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGATTGGTAGGTTTAAGAATAATTTTTACTGTACTTTCTATAGTGAATAGGGTTAGGCAGGGATACTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGGAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACACATCCGGACGCTTAGTGGATGGATTCTTAGCAATTTTCTGGGTCGATCTGCGGAACCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCTATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTATTACAAAGAGTTTATAGAGCTATTCTCAACATACCTACAAGAATCAGACAGGGCTTGGAAAGGGCTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8408, - "end": 8500, - "orientation": "forward", - "distance": 0.5806451612903225, - "indel_impact": 0, - "protein": "RPTPQPRGDPTGQKESEKKVERETETDPDH", - "nucleotides": "AGACCCACCCCCCAGCCCCGAGGGGACCCGACAGGCCAGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGACCATTAG", - "subtype_start": 7705, - "subtype_end": 7797, - "subtype_aminoacids": "RPTSQPRGDPTGPEESKKKVERETETHPDA*", - "subtype_nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGGAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACACATCCGGACGCTTAG" - }, - { - "region": "rev_exon2", - "start": 8409, - "end": 8684, - "orientation": "forward", - "distance": 0.326086956521739, - "indel_impact": 0, - "protein": "DPPPSPEGTRQARRNRRRRWRERQRQIRTISERILSNYLGRPAEPVPLQLPPLERLTLDCNEDCGTSGTRGVGTPQVLVESPAVLESGTKE", - "nucleotides": "GACCCACCCCCCAGCCCCGAGGGGACCCGACAGGCCAGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGACCATTAGTGAACGGATTCTTAGCAATTATCTGGGTCGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCGGGGGGTGGGAACTCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAAGAATAG", - "subtype_start": 7706, - "subtype_end": 7981, - "subtype_aminoacids": "DPPPSPEGTRQARRNRRRRWRERQRHIRTLSGWILSNFLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGGAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACACATCCGGACGCTTAGTGGATGGATTCTTAGCAATTTTCTGGGTCGATCTGCGGAACCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8828, - "end": 9460, - "orientation": "forward", - "distance": 0.31611374407582926, - "indel_impact": 0, - "protein": "MGGKWSKSSVVGWPAIRERMRRAEPAAEPAADGVGAASRDLEKHGAITSSNTAATNADCAWLEAQEEEEVGFPVRPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIYSQKRQDILDLWVYHTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPVEPDKVEEANEGENNSLLHPMSLHGMDDPEKEVLVWRFDSRLAFHHMARELHPEYYKDC", - "nucleotides": "ATGGGTGGTAAGTGGTCAAAAAGTAGTGTGGTTGGATGGCCTGCTATAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGAGCCAGCAGCAGATGGGGTGGGAGCAGCATCTAGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCAGGCCTCAGGTACCTTTAAGACCAATGACTTATAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGTTAATTTACTCCCAGAAAAGACAAGATATCCTTGATCTGTGGGTCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATAAGATATCCACTGACCTTTGGATGGTGCTTCAAGCTAGTGCCAGTTGAGCCAGATAAGGTAGAAGAGGCCAATGAAGGAGAGAACAACAGCTTGTTACACCCTATGAGCCTGCATGGGATGGATGACCCAGAGAAAGAAGTGTTAGTATGGAGATTTGACAGTCGCCTGGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAGTACTACAAGGATTGCTGA", - "subtype_start": 8125, - "subtype_end": 8751, - "subtype_aminoacids": "MGSKWSKMSGWPAVRERMRRTKPAEPAADGVGAASRDLEKHGAITSSNTAATNADCAWLEAQEEEGEVGFPVKPQVPLRPMTYKGALDLSHFLKEKGGLEGLIYSQQRQDILDLWVYHTQGYFPDWQNYTPGPGVRFPLTFGWCFKLVPVEPDKVEEANEGENNCLLHPMSQHGMEDPEKEVLMWKFDSRLALHHMAREKHPEYYKDC*", - "subtype_nucleotides": "ATGGGTAGCAAGTGGTCAAAAATGAGTGGGTGGCCTGCTGTAAGGGAAAGAATGAGAAGAACTAAGCCAGCTGAGCCAGCAGCAGATGGAGTGGGAGCAGCATCTAGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGGGGAGGTGGGTTTCCCAGTCAAACCTCAGGTACCTTTAAGACCAATGACTTACAAGGGAGCTCTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAACAAAGACAAGATATCCTTGATCTGTGGGTCTACCATACACAAGGCTACTTCCCTGATTGGCAGAATTACACACCAGGGCCAGGGGTCAGATTCCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGAGCCAGACAAGGTAGAAGAGGCCAATGAAGGGGAAAACAACTGCTTGTTACACCCTATGAGCCAGCATGGGATGGAAGACCCGGAGAAAGAAGTGTTAATGTGGAAGTTTGACAGCCGCCTGGCATTGCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAGGACTGCTGA" - } - ], - "OQ092465": [ - { - "region": "gag", - "start": 855, - "end": 2357, - "orientation": "forward", - "distance": 0.3900199600798404, - "indel_impact": 0, - "protein": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSKGCRQILEQLQPSLQTGSEELKSLYNTVATLYCVHQQIDVKDTKEALDKIEEEQNKSKKKVQQAATDTGNNSQASQNYPIVQNLQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKESFRDYVDRFYKTLRAEQCTQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTTPGAIMMQRGNFRSQRKTVKCFNCGKEGHIAKNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPAESFRFGEETTTPSQKQEPIDKELYPLASLRSLFGNDPSSQ", - "nucleotides": "ATGGGTGCGAGAGCGTCGGTATTAAGCGGGGGAGAATTAGATAGGTGGGAAAAAATTCGGTTAAGGCCAGGAGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAACTAGAACGGTTTGCAGTTAATCCTGGCCTGTTAGAAACATCAAAAGGCTGTAGACAAATATTGGAACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAACAGATAGATGTAAAAGACACCAAGGAAGCTTTAGATAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGTACAGCAAGCAGCAACTGACACAGGAAACAACAGCCAGGCCAGCCAAAATTACCCTATAGTGCAGAATCTCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGACTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGGATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAATCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAACAATGTACACAGGAGGTAAAAAATTGGATGACAGAAACATTATTGGTCCAAAATGCAAACCCAGACTGCAAGACTATTTTAAAAGCATTAGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCTGGCCATAAAGCAAGAGTTTTGGCAGAAGCAATGAGCCAAGTAACAACTCCAGGTGCCATAATGATGCAGAGAGGCAATTTCAGGAGCCAAAGAAAGACTGTTAAGTGTTTCAACTGTGGCAAAGAAGGGCATATAGCTAAAAATTGCAGGGCCCCTAGAAAAAGGGGCTGTTGGAAATGTGGAAAAGAAGGACACCAAATGAAAGATTGCACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTCGGGGAGGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA", - "subtype_start": 200, - "subtype_end": 1699, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYRLKHVVWASRELERFAVNPGLLETSEGCRQILEQLQPSLKTGSEELKSLFNTVAVLYCVHQKIEVKDTKEALDKIEEEQNKSKKKAQQAAAGTGNSNQASQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKGFNPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKDTINEEAAEWDRLHPVHAGPIPPGQMREPRGSDIAGTTSNLQEQIQWMTSNPPVPVGEIYKRWIILGLTTLVGMYSPVSILDIKQGPKDLFRDYVDRFFKTLRLEQCTQEVKGWMTDTLLVPNANPDCKTFLKALGPGPSLEELTTPGPGVGGPSHKARVLAEAMSQTTSAAVMMQKSNFKGQRRIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGRIWPSHKGRPGNFLQSRPEPSAPPEESFRFGEETTTPPQKQEPIDKELYPLASLKSLFGNDQ*SQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATAGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATGTAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTTTTAGAGACATCAGAAGGCTGTAGACAAATACTGGAACAGCTACAACCATCCCTTAAGACAGGATCAGAAGAACTTAAATCATTATTTAATACAGTAGCAGTCCTCTATTGTGTGCATCAAAAGATAGAGGTAAAAGATACCAAAGAAGCTTTAGATAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGCAGCAGCTGGCACAGGAAACAGCAACCAGGCCAGCCAAAATTACCCTATAGTGCAGAACCTTCAGGGGCAAATGGTACATCAACCCCTATCACCTAGAACTTTAAATGCATGGGTAAAGGTGGTAGAAGAGAAGGGTTTTAACCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGGGCCACTCCACAAGATTTAAACACCATGTTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGACACCATCAATGAGGAAGCTGCAGAATGGGACAGATTACATCCAGTGCATGCAGGACCTATCCCACCAGGCCAGATGAGGGAACCTAGGGGAAGTGATATAGCTGGAACAACTAGTAACCTTCAGGAACAAATACAATGGATGACAAGCAACCCACCTGTCCCAGTGGGAGAAATCTATAAAAGATGGATCATCCTAGGATTAACTACACTAGTAGGAATGTATAGCCCTGTCAGCATTTTGGACATAAAACAAGGGCCAAAAGACCTTTTTAGAGACTATGTAGACCGGTTCTTTAAAACCCTAAGACTTGAGCAATGTACACAGGAAGTAAAAGGTTGGATGACAGACACCTTGTTGGTTCCAAATGCGAACCCCGATTGTAAGACCTTTTTAAAAGCTTTGGGCCCAGGGCCTTCACTTGAAGAACTGACGACCCCTGGTCCGGGAGTGGGAGGACCTAGCCATAAGGCAAGAGTTTTGGCTGAGGCAATGAGCCAAACAACAAGTGCAGCTGTAATGATGCAGAAAAGTAACTTTAAGGGCCAAAGAAGAATTGTTAAATGTTTTAATTGTGGCAAAGAAGGACACATAGCCAAAAATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGCACTGAAAGACAGGCTAATTTTTTAGGGAGAATTTGGCCTTCCCACAAGGGAAGGCCCGGGAATTTCCTTCAGAGCAGGCCAGAGCCATCAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAGGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTGGCTTCCCTCAAATCACTCTTTGGCAACGACCAATAGTCACAGTAA" - }, - { - "region": "pol", - "start": 2150, - "end": 5161, - "orientation": "forward", - "distance": 0.1733067729083666, - "indel_impact": 0, - "protein": "FFREDLAFPQGEAREFSSEQTRANSPTSRELQVRGGDNNSLSEAGADRQGTVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEDMTLPGRWKPKMIGGIGGFIKVRQYDQILVEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIDELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCRLLKGTKALTEVVPLTREAELELAENREILKEPVHGVYYDPTKDLIAELQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQRETWDTWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIKKDKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDDHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKTIHTDNGGNFISTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVYIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKLQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTCGGGGAGGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGACACAGGAGCAGATGATACAGTATTAGAAGACATGACTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTTGTAGAAATCTGTGGACATAAAGCTATAGGTACAGTTTTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAAATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATCTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATATAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATAAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGATGAATTAAGACAACATCTGTTGAAGTGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGGTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGTTGCCAGAAAAAGACAGTTGGACTGTTAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAGGTAAAGCAATTATGTAGACTCCTTAAGGGAACCAAGGCACTAACAGAAGTAGTACCACTAACAAGAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCAACAAAAGACTTAATAGCAGAGTTACAGAAGCAGGGACAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGAGGTGCCCACACTAATGATGTAAAACAGCTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAGTTTAAACTACCCATACAAAGAGAAACATGGGACACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTTTATGTAGATGGTGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACCACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATCCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTTAATCAAATAATAGAACAGTTAATAAAAAAGGACAAAGTATACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGATCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGCCAGTTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTGGATTGTACACACTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTATCTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGTGGCAATTTCATCAGTACTACAGTTAAGGCCGCCTGTTGGTGGGCGGGGATCAAGCAGGAATTTGGCATACCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATCATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATACATACACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAACTTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 1492, - "subtype_end": 4503, - "subtype_aminoacids": "FFRENLAFPQGKAREFPSEQARAISPTRRELQVWGGDNNSPSEAGADRQGTVSFGFPQITLWQRPIVTVKIEGQPKEALLDTGADDTVLEDINLPGKWKPKMIGGIGGFIKVKQYDNISIDICGHKATGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALIEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKKTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKEFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIEQHRTKVEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQCQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIATESIVIWGKIPKFKLPIQKETWETWWIEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKKGKAGYVTDRGRQKVVPLTDTTNQKTELQAIHLALQDSGVEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKILFLDGIDKAQEEHEKYHSNWRSMASDFNLPPIVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKTIHTDNGSNFISNTVKAACWWAGIKQEFGIPYNPQSQGVIESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAGAATTTGGCCTTCCCACAAGGGAAGGCCCGGGAATTTCCTTCAGAGCAGGCCAGAGCCATCAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAGGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTGGCTTCCCTCAAATCACTCTTTGGCAACGACCAATAGTCACAGTAAAGATAGAGGGACAGCCAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGACATAAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAAACAGTATGATAACATATCCATAGACATTTGTGGACACAAGGCTACAGGTACAGTATTAGTAGGGCCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGCTGTACTTTAAATTTCCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAATAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAGGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAGCTTAATAAGAAAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATAAGGAATTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGCAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGAGCAGCATAGAACAAAAGTAGAGGAACTGAGACAACATCTGTTAAGGTGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCCCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAGGACAGCTGGACTGTCAATGACATACAGAAATTAGTAGGAAAATTGAATTGGGCAAGTCAGATTTATCCAGGGATTAAAGTAAGGCAATTATGTAAGCTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAATGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAGTATGCAAGAACGAGGGGTGCCCACACAAATGATGTAAAACAACTAACAGAAGCAGTGCAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAGATTCCTAAATTTAAATTGCCCATACAGAAAGAGACATGGGAAACATGGTGGATAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAAAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGGGAAACTAAAAAAGGAAAAGCAGGATATGTTACTGACAGAGGAAGACAAAAAGTTGTTCCCCTGACGGACACAACAAATCAGAAGACTGAGCTACAAGCAATCCATTTAGCTTTGCAGGATTCGGGAGTAGAGGTAAACATAGTAACAGACTCCCAATATGCATTAGGAATCATCCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTTAGTCAAATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTATTTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGAAAGATACTATTTTTAGATGGAATAGATAAGGCCCAAGAGGAACATGAGAAATATCACAGTAATTGGAGATCAATGGCTAGTGATTTTAACCTGCCACCTATAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAGCTAGATTGTACACACTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATACATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAAGAAACAGCATACTTTATCTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCATCAGTAACACGGTTAAGGCCGCCTGTTGGTGGGCGGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAATAGAATCTATGAATAAGGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAAACAGCAGTGCAAATGGCAGTATTCATTCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGAGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAACTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTGCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 5106, - "end": 5684, - "orientation": "forward", - "distance": 0.3471502590673574, - "indel_impact": 0, - "protein": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYISKKAKGWIYKHHYDSINPKISSEVHIPLGDAKLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPGLADRLIHLYYFDCFSESAIRNAILGRIVSPSCDYQAGHNKVGSLQYLALTALITPKKIKPPLPSVKKLTEDRWNKPQKTKGHRGSHTMNGH", - "nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGGACATGGAAAAGTTTAGTAAAACACCATATGTATATTTCAAAGAAAGCTAAGGGATGGATTTATAAGCATCACTATGACAGTATTAATCCAAAAATAAGTTCAGAAGTTCACATCCCACTAGGGGATGCAAAATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTGGCCTAGCAGACCGACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACGTATAGTTAGTCCTAGTTGTGATTATCAAGCAGGACATAACAAGGTAGGGTCTCTACAGTACTTGGCACTAACAGCACTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTAAGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAG", - "subtype_start": 4448, - "subtype_end": 5026, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWNSLVKHHVYVSRKARNWVYKHHYESTHPRISSEVHIPLGDAKLVVITYWGLHTGERDWHLGQGVSIEWRKRRYSTQIDPDLADQLIHLYYFDCFSESAIRNAILGRIVRPRCDYQAGHNKVGSLQYLALTALVTPKKIKPPLPSVRKLTEDRWNKPQQIKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATGTGTATGTTTCAAGGAAAGCTAGGAATTGGGTTTATAAACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAAATTAGTAGTAATAACTTATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAGGAGATATAGCACACAAATAGACCCTGACCTAGCAGACCAACTAATTCACCTGTATTATTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACGTATAGTTAGACCTAGGTGTGATTATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCACTAACAGCATTAGTAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACAAGCCCCAGCAGATCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5624, - "end": 5914, - "orientation": "forward", - "distance": 0.19587628865979378, - "indel_impact": 0, - "protein": "MEQAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHSLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCQHSRIGILQRRRARNGASRS", - "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACACTTTCCTAGGCCATGGCTACACAGCTTAGGGCAACATATCTATGAAACATATGGAGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGAATTGGGTGTCAACATAGCAGAATAGGCATCCTTCAACGGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 4966, - "subtype_end": 5256, - "subtype_aminoacids": "MEQAPADQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHYRIGCQHSRIGITRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGCAGATCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAGAGCTTTTAGAGGAACTTAAGAATGAAGCTGTTAGACATTTTCCTAGACCATGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTCATTCATTACAGAATTGGATGTCAACATAGCAGAATAGGCATTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5895, - "end": 6109, - "orientation": "forward", - "distance": 0.20833333333333326, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPRTACTSCYCKQCCFHCQVCFITKGLGISYGRKKRRQRRRAHQDSQTHQASLSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGATTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTGCTTGTACCTCTTGTTATTGTAAGCAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA", - "subtype_start": 5237, - "subtype_end": 5451, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPRTACTKCYCKKCCFHCQVCFITKGLGISYGRKKRRQRRRTPQDSQTHQVSLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAACACCCAGGGAGTCAGCCTAGGACTGCCTGTACCAAATGCTACTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAAGATAGTCAGACTCATCAAGTTTCTCTATCAAAACA" - }, - { - "region": "rev_exon1", - "start": 6034, - "end": 6109, - "orientation": "forward", - "distance": 0.46153846153846145, - "indel_impact": 0, - "protein": "MAGRSGDSDEELIKTVRLIKLLYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA", - "subtype_start": 5376, - "subtype_end": 5451, - "subtype_aminoacids": "MAGRSGDSDEELLKIVRLIKFLYQNX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAAGATAGTCAGACTCATCAAGTTTCTCTATCAAAACA" - }, - { - "region": "vpu", - "start": 6126, - "end": 6374, - "orientation": "forward", - "distance": 0.36829268292682915, - "indel_impact": 0, - "protein": "MQPLVISAIVALVVVAIIAIVVWSIVLIEYRKILRQRKIDRIIDRIRERAEDSGNESEGDQEELSALVVEMGHDAPWDVNDL", - "nucleotides": "ATGCAACCTTTAGTAATATCAGCAATAGTAGCATTAGTAGTAGTAGCGATAATAGCAATAGTTGTGTGGTCCATAGTACTCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGATAATTGATAGAATAAGGGAAAGAGCAGAAGACAGTGGCAATGAGAGCGAGGGGGATCAGGAAGAATTATCAGCACTTGTTGTGGAGATGGGGCACGATGCTCCTTGGGATGTTAATGATCTGTAG", - "subtype_start": 5468, - "subtype_end": 5707, - "subtype_aminoacids": "MHSLQILAIVALVVVAIIAIVVWTIVLIEYRKILRQRRIDRIIERIRERAEDSGNESEGDQEELLVEMGHDAPWDVNDL*", - "subtype_nucleotides": "ATGCACTCTTTACAAATATTAGCAATAGTAGCATTAGTAGTAGTAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGGCAAAGAAGAATAGACAGGATAATTGAGAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGCGAAGGGGACCAGGAAGAATTACTGGTGGAAATGGGGCATGATGCTCCTTGGGATGTTAATGATCTGTAA" - }, - { - "region": "env", - "start": 6289, - "end": 8880, - "orientation": "forward", - "distance": 0.5747747747747753, - "indel_impact": 0, - "protein": "MRARGIRKNYQHLLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYGTEVHNVWATHACVPTDPNPQEVVLGNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNYNNTGANNTTSTNTTTPSTTVSSEERMGEGEIKNCSFNITTSLRDKMQKEYALFYRPDIVPIDNTSYRLISCNTSVITQACPKVTFEPIPIHYCAPAGFAILKCNNKTFNGSGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEIVIRSENFSDNAKNIIVHLNKSVEITCIRPNNNTRKSIPMGPGKAFFATGAIIGDIRQAHCKINKTKWNNTLEQVFKKLREQFGNNKTIIFNHSSGGDPEIVTHNFNCGGEFFYCNTSKLFNSTWNETSYWKGEGSNNDTITLPCRIKQIINLWQEVGKAMYAPPIRGLIKCSSNITGLLLTRDGGNESETTDTETFRPVGGNMKDNWRSELYKYKVVKIEPLGIAPTKAKRRVVQREKRAVGLGAMFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKNLTQIWDNMTWMQWEKEIDNYTSLIYTLIEESQNQQEKNELELLELDKWASLWNWFDITKWLWYIKIFIMIVGGLVGLRIVFAVLSIANRVRQGYSPLSFQTHLPAPRGPDRPEGIEGEGGERDRDRSGPLVNGFLAIIWVDLRSLCLFSYHRLRDLLLIVTRIVELLGRGGWELLKYWWNLLQYWSQELKNSAISLLNATAIAVAEGTDRIIEVLQRAGRAFLHIPRRIRQGLERALL", - "nucleotides": "ATGAGAGCGAGGGGGATCAGGAAGAATTATCAGCACTTGTTGTGGAGATGGGGCACGATGCTCCTTGGGATGTTAATGATCTGTAGCGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACTACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGGTACAGAGGTACATAATGTGTGGGCTACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGGAAATGTGACAGAGAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGTCTAAAACCATGTGTAAAATTAACCCCACTCTGTGTTACTTTGAATTGCACTAATTATAATAACACTGGTGCCAATAATACCACTAGTACCAATACTACCACCCCTAGTACCACTGTTAGTAGCGAGGAAAGGATGGGGGAAGGAGAAATAAAAAACTGCTCTTTCAATATCACCACAAGCCTAAGAGATAAGATGCAGAAAGAATATGCACTCTTTTATAGACCTGATATAGTACCAATAGATAATACTAGTTATAGGTTGATAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTAACCTTTGAGCCAATTCCCATACATTATTGTGCCCCAGCTGGTTTTGCGATTCTAAAGTGTAACAATAAGACGTTCAATGGATCAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTCTAGCAGAAGAAGAGATAGTAATTAGATCTGAAAATTTCTCAGACAATGCTAAAAACATAATAGTACATCTGAATAAATCTGTAGAAATTACTTGTATAAGACCCAACAACAATACAAGAAAAAGTATACCTATGGGACCAGGGAAAGCATTTTTTGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAAAATTAATAAAACGAAATGGAATAACACTTTAGAGCAGGTATTTAAAAAGTTAAGAGAACAATTTGGGAATAATAAGACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTGTAACGCACAATTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACATCAAAACTGTTTAATAGTACTTGGAATGAGACTAGTTATTGGAAGGGTGAAGGGTCAAACAATGACACTATCACACTCCCATGCAGAATAAAGCAAATTATAAACCTGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGAGGACTCATTAAATGTTCATCAAATATCACAGGGCTACTATTAACAAGAGATGGTGGTAACGAGAGCGAGACCACCGACACTGAGACCTTCAGACCTGTAGGAGGAAATATGAAGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAATAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGATTAGGAGCTATGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATAACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAAAATCTGACACAGATTTGGGATAACATGACCTGGATGCAATGGGAAAAAGAAATTGACAATTACACAAGCTTAATATACACCTTAATTGAAGAATCGCAAAACCAACAAGAAAAGAATGAACTAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTGACATAACAAAGTGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTGCTTTCTATAGCGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCAGAAGGAATCGAAGGAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGACCATTAGTGAACGGATTCTTAGCAATTATCTGGGTCGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCGGGGGGTGGGAACTCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAAGAATAGTGCTATTAGCTTGCTTAATGCTACAGCCATAGCAGTAGCTGAGGGGACAGATAGGATTATAGAAGTATTACAAAGAGCAGGGAGAGCTTTTCTCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGGCTTTGCTATAA", - "subtype_start": 5631, - "subtype_end": 8207, - "subtype_aminoacids": "MRAKGTRKNYWWKWGMMLLGMLMICNAAEQSWVTVYYGVPVWKEASTTLFCASDASAVDTEVHNVWATHACVPTDPNPQEIVLENVTENFNMWKNNMVDQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNWNTTNKLNATDTNSSRIEEEMKEEMKNCSFNVTSSIGNKMQKEYALFYKLDVVPINNDSTSYTLINCNTSVITQACPKVSFEPIPIHYCTPAGFALLKCNDKKFNGTGPCTNVSTVQCTHGIKPVVSTQLLLNGSLAEEEIVIRSENFSNNAKTIIVQLNETVEITCERPNNNTRKGIHLGFGRALYATGEIIGDIRQAYCNLNRTKWENTLKRIVTKLGEQFKNQNKTITFDLSSGGDPEIMLHSFSCGGEFFYCNTTQLYNSNRKQENNGTWNNNGSNTNDNITLPCRIKQIINRWQEVGKAMYAPPITGAIRCLSNITGLLLTRDGGTRANLSETFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGTIGAMFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTAVPWNTSWSNKSLDEIWNNMTWMQWEKEIDNYTGVIYNLIEEAQNQQEKNEQELLQLDTWASLWNWFNITKWLWYIKIFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTRLPAQRGPDRPGGIEEEGGERDKDRSIRLVDGFLAIIWEDLRSLCLFSYHRLRDLLLIVTRVVELLGRRGWEALKYCWNLLQYWSQELRNSAVSLLNATAIAVAEGTDRIIEIVQIICRAILHIPRRIRQGLERALQ*", - "subtype_nucleotides": "ATGAGAGCGAAGGGGACCAGGAAGAATTACTGGTGGAAATGGGGCATGATGCTCCTTGGGATGTTAATGATCTGTAATGCTGCAGAACAATCGTGGGTCACAGTCTACTATGGGGTGCCTGTGTGGAAAGAAGCATCCACCACCTTATTTTGTGCATCAGATGCTAGCGCAGTTGACACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCTAACCCACAGGAAATAGTATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGATCAAATGCATGAAGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATTGGAATACTACTAATAAGTTGAATGCTACTGATACCAATAGTAGTAGAATAGAGGAAGAAATGAAGGAAGAAATGAAAAACTGCTCTTTCAATGTCACCTCAAGCATAGGAAATAAGATGCAGAAAGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAAATAATGATAGTACAAGCTATACACTGATAAATTGTAACACCTCAGTCATTACACAAGCCTGTCCAAAGGTATCCTTTGAACCAATTCCCATACATTATTGTACCCCAGCTGGTTTTGCGCTTCTAAAGTGTAATGATAAGAAATTCAATGGAACAGGACCATGTACAAATGTCAGTACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTCTAGCAGAAGAAGAAATAGTGATTAGATCTGAAAATTTCTCGAACAATGCTAAAACCATAATAGTACAGCTGAATGAAACTGTAGAAATTACTTGTGAAAGACCCAACAACAATACAAGAAAAGGTATACATCTAGGATTTGGGAGAGCATTGTATGCAACAGGAGAAATAATAGGAGATATAAGACAAGCATATTGTAACCTTAATAGAACAAAATGGGAAAACACTTTAAAACGGATAGTTACAAAATTAGGAGAACAATTTAAAAATCAAAATAAAACAATAACCTTCGATCTCTCCTCAGGAGGGGACCCAGAAATTATGCTGCACAGTTTCAGTTGTGGAGGGGAATTCTTCTACTGTAATACAACACAGCTGTATAATAGTAATAGGAAGCAGGAGAATAATGGCACTTGGAATAATAATGGGAGTAATACTAATGATAATATCACACTCCCATGCAGAATAAAACAAATTATAAACAGGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCACAGGAGCAATTAGATGTTTATCAAATATTACAGGGCTGTTATTAACAAGAGATGGTGGAACGAGAGCGAATCTGTCCGAGACCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAACAATAGGAGCTATGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATGACGCTGACGGTACAAGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGGGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTAGAAAGATACCTAAAGGATCAACAGCTCCTAGGGATTTGGGGTTGCTCTGGAAAACTCATCTGCACCACTGCTGTGCCTTGGAATACTAGTTGGAGTAATAAATCTCTAGATGAAATTTGGAACAACATGACCTGGATGCAGTGGGAAAAAGAAATTGACAATTACACAGGCGTAATATACAATTTAATTGAAGAAGCGCAAAACCAACAAGAAAAGAATGAACAAGAATTATTGCAATTGGATACGTGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTATATAAAGATATTCATAATGATAGTAGGAGGCTTAGTAGGTTTAAGAATAGTTTTTGCTGTACTCTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTCCAGACCCGCCTCCCAGCCCAGAGGGGACCCGACAGGCCCGGAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAAGGACAGATCCATAAGATTAGTAGATGGATTCTTAGCAATCATCTGGGAAGACCTACGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTGACGAGGGTTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGCTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAGGAATAGTGCTGTTAGTTTGCTTAATGCCACAGCTATAGCAGTAGCTGAGGGAACAGATAGGATTATAGAAATAGTACAAATAATTTGTAGAGCTATTCTCCACATACCTAGACGAATAAGACAGGGCTTAGAAAGAGCTTTGCAATAA" - }, - { - "region": "tat_exon2", - "start": 8462, - "end": 8554, - "orientation": "forward", - "distance": 0.6875, - "indel_impact": 0, - "protein": "RPTSQPRGDPTGQKESKEKVERETETDPDH", - "nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCAGAAGGAATCGAAGGAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGACCATTAG", - "subtype_start": 7789, - "subtype_end": 7881, - "subtype_aminoacids": "RPASQPRGDPTGPEESKKKVERETRTDP*D*", - "subtype_nucleotides": "AGACCCGCCTCCCAGCCCAGAGGGGACCCGACAGGCCCGGAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAAGGACAGATCCATAAGATTAG" - }, - { - "region": "rev_exon2", - "start": 8463, - "end": 8738, - "orientation": "forward", - "distance": 0.4301075268817205, - "indel_impact": 0, - "protein": "DPPPSPEGTRQARRNRRRRWRERQRQIRTISERILSNYLGRPAEPVPLQLPPLERLTLDCNEDCGTSGTRGVGTPQVLVESPAVLESGTKE", - "nucleotides": "GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCAGAAGGAATCGAAGGAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGACCATTAGTGAACGGATTCTTAGCAATTATCTGGGTCGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCGGGGGGTGGGAACTCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAAGAATAG", - "subtype_start": 7790, - "subtype_end": 8065, - "subtype_aminoacids": "DPPPSPEGTRQARRNRRRRWRERQGQIHKISRWILSNHLGRPTEPVPLQLPPLERLTLDCDEGCGTSGTQGVGSPQILLESPAVLESGTKE*", - "subtype_nucleotides": "GACCCGCCTCCCAGCCCAGAGGGGACCCGACAGGCCCGGAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAAGGACAGATCCATAAGATTAGTAGATGGATTCTTAGCAATCATCTGGGAAGACCTACGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTGACGAGGGTTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGCTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAGGAATAG" - }, - { - "region": "nef", - "start": 8882, - "end": 9514, - "orientation": "forward", - "distance": 0.3285714285714285, - "indel_impact": 0, - "protein": "MGGKWSKSSVVGWPKIRERMRRAEPAAEPAADGVGAASRDLEKHGAITSSNTAATNADCAWLEAQEEEEVGFPVRPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIYSQKRQDILDLWVYHTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPVEPDKVEEANEGENNSLLHPMSLHGMDDPEKEVLVWRFDSRLAFHHMARELHPEYYKDC", - "nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGGTTGGATGGCCTAAAATAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGAACCAGCAGCAGATGGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCAGGCCTCAGGTACCTTTAAGACCAATGACTTATAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAGAAAAGACAAGATATCCTTGATCTGTGGGTCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATCAGATATCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGAGCCAGATAAGGTAGAAGAGGCCAATGAAGGAGAGAACAACAGCTTGTTACACCCTATGAGCCTGCATGGGATGGATGACCCAGAGAAAGAAGTGTTAGTATGGAGATTTGACAGTCGCCTGGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAGTACTACAAGGACTGCTGA", - "subtype_start": 8209, - "subtype_end": 8841, - "subtype_aminoacids": "MGGKWSKCSMGGWTAVRERMRRTEPATEPAADGVGAVSRDLEKHGAITSSNTAATNADCAWLEAQEEEEVGFPVRPQVPLRPMTYKGALDLSHFLKEKGGLEGLIYSQKRQEILDLWVYNTQGYFPDWQNYTKGPGIRYPLTFGWCFKLVPVDPEQVEKANEGENNSLLHPMSQHGMDDPEKEVLMWKFDSRLAFRHVARELHPEYYKDC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAATGTAGCATGGGTGGGTGGACTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAACTGAGCCAGCAGCAGATGGGGTGGGAGCAGTATCTCGAGACTTGGAAAAACATGGAGCAATTACAAGTAGCAATACAGCAGCTACTAATGCTGACTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAAGTGGGCTTCCCAGTCAGACCTCAGGTACCTTTAAGACCAATGACTTACAAAGGAGCTTTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAGAAAAGACAAGAGATCCTTGATCTGTGGGTCTACAACACACAAGGATACTTCCCTGATTGGCAGAACTACACAAAAGGGCCAGGGATCAGATATCCACTGACCTTCGGATGGTGCTTCAAGCTAGTGCCAGTTGATCCAGAACAGGTAGAAAAGGCCAATGAAGGAGAGAACAACAGCCTGTTACACCCTATGAGCCAGCATGGGATGGATGACCCAGAGAAAGAAGTGTTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCGCCACGTAGCCAGAGAGCTGCATCCGGAGTATTACAAAGACTGCTGA" - } - ], - "OQ092462": [ - { - "region": "gag", - "start": 767, - "end": 2269, - "orientation": "forward", - "distance": 0.2870775347912524, - "indel_impact": 0, - "protein": "MGARASVLSGGELDKWEKIRLRPGGKKKYQLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPALQTGSEELKSLFHTVATLYCVHQKIEVKDTKEALEKIEEEQKKSKKKAQQAAADTGNNSQVSQNYPIVQNMQGQMVHQPLSPRTLNAWVKVIEDKAFSPEVIPMFTALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVQAGPVAPGQLREPRGSDIAGTTSNLQEQIAWMTHNPPVPVGEIYKRWILLGLNKIVRMYSPVSILDIKQGPKEPFRDYVDRFYKTLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQVANPASIMMQRGNFRNQRKPIKCFNCGKEGHIAKNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKVWPSRKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKPEPIDKELYPLASLRSLFGNDPSSQ", - "nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGACAAATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATCAGCTAAAACATATAGTATGGGCAAGCAGGGAACTAGAGCGATTTGCAGTTAATCCCGGCCTGTTAGAAACATCGGAGGGCTGTAGACAAATACTAGGGCAACTACAGCCCGCTCTTCAGACAGGATCAGAAGAACTTAAATCACTATTTCATACAGTAGCAACCCTCTATTGTGTGCATCAAAAGATAGAGGTGAAAGACACCAAAGAAGCTTTAGAGAAGATAGAGGAAGAGCAAAAGAAAAGTAAGAAAAAGGCACAGCAAGCAGCCGCTGACACAGGAAATAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATGCAGGGACAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAATAGAAGATAAGGCTTTCAGTCCAGAAGTAATACCCATGTTTACAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAATACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGATTACATCCAGTGCAGGCAGGGCCTGTTGCGCCAGGCCAGCTACGAGAGCCAAGGGGAAGTGACATAGCAGGAACTACTAGTAACCTTCAGGAGCAAATAGCATGGATGACACATAATCCACCTGTCCCAGTAGGAGAAATCTATAAAAGATGGATACTTCTGGGATTAAATAAAATAGTAAGAATGTACAGCCCCGTCAGCATTCTGGACATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCTGAGCAGGCTTCACAGGATGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCGAACCCAGATTGTAAGACTATTTTGAAAGCATTAGGACCAGCAGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCAGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAGCAAATCCAGCTAGCATAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGCCTATTAAGTGTTTCAACTGTGGCAAAGAGGGGCATATTGCTAAAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGGTCTGGCCTTCCCGCAAGGGGAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACGACAACTCCCTCTCAGAAACCGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 2062, - "end": 5073, - "orientation": "forward", - "distance": 0.1494023904382471, - "indel_impact": 0, - "protein": "FFREGLAFPQGEAREFPSEQTRANSPTRRELQVWGRDDNSLSETGADRQGTVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMNLPGKWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALTEICTEMEKEGKISKIGPENPYNTPIFAIKKKDGNKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRAKIEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPILLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVVPLTAEAELELAENREILKEPVHGVYYDPSKDLIAELQKQGQGQWTYQIYQEPYKNLKTGKYARMRGTHTNDIKQLTEAVQKIATESIVIWGKTPKFKLPIQKETWEAWWIEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDKGRQKVVPLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPIVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLKGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQEVIESMNKELKKIIGQVRDQAEHLKTAVQMAVFVHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAGGTCTGGCCTTCCCGCAAGGGGAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACGACAACTCCCTCTCAGAAACCGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGAGGGCAACTAAAGGAAGCTCTATTAGACACAGGAGCAGATGATACAGTTTTAGAAGAAATGAATTTGCCAGGAAAATGGAAGCCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTGTTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAAATTGGCTGCACTTTAAATTTTCCCATTAGCCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAGAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAACAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATCTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAATATTTGCCATAAAGAAGAAAGATGGTAATAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCTGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATAAGGACTTCAGGAAGTATACTGCATTTACAATACCTAGCACAAACAATGAGACACCAGGGATTAGATACCAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATTTTGGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATTTATCAATACATGGATGATTTATATGTAGGGTCTGACTTAGAAATAGGGCAGCATAGAGCAAAAATAGAGGAACTGAGACAACATCTATTGAAGTGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCCGATAAATGGACAGTACAGCCTATATTGCTGCCAGAAAAAGACAGCTGGACTGTTAATGACATACAGAAGTTAGTGGGAAAATTGAACTGGGCAAGTCAGATTTACCCAGGAATCAAAGTAAGGCAGCTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAGTGGTACCACTAACAGCAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGATTTAATAGCAGAACTACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATATAAAAATCTGAAAACAGGAAAGTATGCAAGAATGAGAGGTACCCACACTAATGATATAAAACAATTAACAGAGGCAGTGCAAAAAATAGCTACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAATTACCCATACAAAAGGAAACATGGGAAGCATGGTGGATAGAGTATTGGCAAGCCACCTGGATTCCTGAATGGGAGTTTGTCAATACCCCTCCCTTAGTGAAACTATGGTACCAGTTGGAAAAAGAACCCATAGTGGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGGGAAACTAAATTAGGAAAAGCAGGATATGTTACTGACAAAGGAAGACAAAAAGTTGTCCCCCTAACGGACACAACAAATCAGAAGACTGAGCTACAAGCAATTCATCTAGCTTTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAACTAGTCAGTCAAATAATAGAGCAGTTAATAAAGAAGGAAAAGGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAGTTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGAAGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGACTTTAACCTGCCACCTATAGTAGCAAAGGAAATAGTAGCCAGTTGTGACAAATGTCAGTTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCTGGAATATGGCAGCTAGATTGTACACATCTAAAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAAGAAACAGCATACTTTCTCTTGAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCTGCCTGTTGGTGGGCGGGGATCAAACAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGAAGTAATAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCAGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCGTCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACCAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATTATCAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 5018, - "end": 5596, - "orientation": "forward", - "distance": 0.34375, - "indel_impact": 0, - "protein": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMHISRKAKNWLYRHHYESIHPRISSEVHIPLGDARLVITTYWGLLTGERDWHLGQGVSIEWRERRYRTQVDPDLADQLIHLYYFDCFSESAIRNALLGRVVSPRCEYQAGHNQVGSLQYLALTALITPKRRKPPLPSVRKLTEDRWNKPQKTKGHKGSHTMNGH", - "nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAGAGTTTAGTAAAACATCATATGCATATATCAAGGAAAGCTAAGAATTGGTTGTATAGACATCACTATGAAAGCATTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTAGTAATAACAACATATTGGGGTCTGCTTACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGGAAAGGAGATATAGAACACAAGTAGACCCTGACCTAGCAGACCAACTAATTCATCTGTATTATTTTGATTGTTTTTCAGAGTCTGCTATAAGAAATGCCTTATTAGGACGTGTAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACCAGGTAGGATCTCTACAGTACTTGGCACTAACAGCATTAATAACACCAAAAAGGAGAAAGCCACCTTTGCCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGGCACTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5536, - "end": 5826, - "orientation": "forward", - "distance": 0.21875, - "indel_impact": 0, - "protein": "MEQAPEDQGPQREPYNEWALELLEELKSEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGINLQRRARNGSSRS", - "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGGCACTAGAACTTTTAGAGGAACTTAAAAGTGAAGCTGTTAGACATTTTCCTAGGATATGGCTTCATGGATTGGGACAGCATATCTATGAAACATATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGAATTGGATGTCGACATAGCAGAATAGGCATTAATCTACAGAGGAGAGCAAGGAATGGATCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5807, - "end": 6021, - "orientation": "forward", - "distance": 0.375, - "indel_impact": 0, - "protein": "MDPVDPSLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFIKKGLGISYGRKKRRQRRRASQGSETHQVSLPKX", - "nucleotides": "ATGGATCCAGTAGATCCTAGCCTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAGACTGCTTGTACCAATTGTTATTGTAAAAAGTGCTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGGCAGTGAGACTCATCAAGTTTCTCTACCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5946, - "end": 6021, - "orientation": "forward", - "distance": 0.46153846153846145, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLKAVRLIKFLYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGGCAGTGAGACTCATCAAGTTTCTCTACCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 6038, - "end": 6283, - "orientation": "forward", - "distance": 0.7471264367816093, - "indel_impact": 0, - "protein": "MRPLEIAAIVALVVAVLIAIVVWSIVLIEYRKILRQRKIDRIVDRIRERAEDSGNESEGDQEELSALVERGHLAPWDIDDL", - "nucleotides": "ATGCGACCTTTAGAAATAGCAGCAATAGTAGCACTAGTAGTAGCAGTACTAATAGCAATAGTTGTGTGGTCTATAGTACTCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGATAGTGGATAGAATAAGAGAAAGAGCAGAAGATAGTGGAAATGAGAGTGAAGGGGATCAGGAGGAATTATCAGCACTGGTGGAGAGGGGGCACCTTGCTCCTTGGGATATTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 6201, - "end": 8777, - "orientation": "forward", - "distance": 0.6409745293466222, - "indel_impact": 33, - "protein": "MRVKGIRRNYQHWWRGGTLLLGILMICSATENLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPSPQEVVLKNVTEKFNMWKNNMVEQMHQDIISLWDESLKPCVKLTPLCVTLNCTNATISGNATEEIKNCSFNVNTKIGGKKQKERALFYKLDVVPIDDDSTNRTNTSYRLISCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCGDKEFNGTGLCRNVSTVQCTHGIRPVVSTQLLLNGSLAEGEVVIKSENITNNVKTIIVQLNETVSINCTRPSNNTRRSIHMGPGRAFYATGEIIGDIRKAQCILNKTDWSDTLTRIAKKLHKQFHGPIAFEQSSGGDPEITMHTFNCGGEFFYCNTSALFSGTWNGTAWTNATWGNIAGNNITLQCRIKQFINRWQEVGKAMYAPPIRGEIRCSSNITGLLLTRDGGSNTTNGGENGTQIGENVTQIFRPGGGDMRDNWRSELYKYKVVKIEPLGIAPTKAKRRVVQREKRAVTFGALFLGFLGAAGSTMGAASMTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLRDQQLLGIWGCSGKLICTTTVPWNASWSNKTLEKIWGNMTWMEWEREIDNYTDLIYTLIEQSQNQQEKNEQELLELDKWAGLWNWFDITNWLWYIKIFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTRLPASRGPDRPEGTDEEGGERDRDRSGSLVNGFLALIWIDLRSLFLFSYLRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNAIAISVAEGTDRIIEAIQRICRAILHIPTRIRQGLERALL", - "nucleotides": "ATGAGAGTGAAGGGGATCAGGAGGAATTATCAGCACTGGTGGAGAGGGGGCACCTTGCTCCTTGGGATATTGATGATCTGTAGTGCCACAGAGAACTTGTGGGTCACAGTCTACTATGGGGTACCTGTGTGGAAAGAAGCAACCACCACTTTATTTTGTGCATCAGATGCCAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAGCCCACAAGAAGTAGTATTGAAAAATGTGACAGAAAAGTTTAATATGTGGAAAAATAACATGGTAGAACAGATGCATCAGGATATAATCAGTTTATGGGATGAAAGCCTAAAACCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATGCTACTATCAGTGGTAATGCAACAGAAGAAATAAAAAACTGCTCTTTCAATGTCAATACAAAAATAGGAGGTAAGAAGCAGAAAGAACGTGCACTTTTTTATAAACTTGATGTAGTACCAATAGATGATGATAGTACTAATAGGACTAATACCAGCTATAGGTTGATAAGTTGTAACACTTCAGTCATTACACAAGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTGGAGATAAAGAGTTCAATGGAACAGGACTATGTAGAAATGTCAGCACAGTCCAATGTACACATGGAATCAGGCCAGTAGTATCAACTCAATTGCTGTTGAATGGCAGTCTAGCAGAAGGAGAGGTAGTAATTAAATCTGAAAATATCACGAACAATGTTAAAACCATAATAGTACAGCTAAATGAAACTGTATCAATTAATTGTACAAGACCTAGCAACAATACAAGAAGAAGCATACATATGGGACCAGGGAGAGCCTTTTATGCAACAGGAGAAATAATAGGAGATATAAGGAAAGCACAGTGTATCCTGAATAAGACAGACTGGAGTGACACTTTAACAAGGATAGCTAAAAAATTACACAAGCAATTTCATGGACCAATAGCATTTGAGCAATCCTCAGGAGGGGACCCTGAAATTACAATGCACACTTTTAATTGTGGAGGGGAATTTTTCTACTGCAACACATCAGCGTTGTTTAGCGGGACCTGGAATGGTACTGCTTGGACTAATGCTACTTGGGGTAATATTGCAGGTAACAATATCACACTCCAATGCAGAATAAAACAATTTATAAACAGATGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGAGGAGAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGCAGTAACACAACAAATGGTGGCGAGAATGGGACCCAGATTGGCGAGAATGTGACCCAGATCTTCAGACCTGGAGGAGGGGATATGAGGGACAATTGGAGAAGTGAATTATACAAATATAAAGTAGTAAAAATTGAGCCATTAGGAATAGCACCCACCAAGGCAAAAAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTAACATTCGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATGACACTGACGGTACAGGCCAGACTATTATTGTCTGGTATAGTGCAACAGCAGAACAATCTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAACAGCTTCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAGGGATCAACAGCTCCTAGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTACTGTGCCTTGGAATGCTAGTTGGAGTAATAAAACTCTGGAAAAAATTTGGGGGAACATGACCTGGATGGAGTGGGAGAGAGAAATTGACAATTATACAGACTTAATATACACCTTAATTGAACAATCGCAGAACCAACAAGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAGGCTTGTGGAATTGGTTTGACATAACAAACTGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGTTTAGTAGGTTTAAGAATAGTTTTTGCTGTGCTTTCTATAGTAAATAGAGTTAGGCAGGGATACTCACCATTATCATTCCAGACCCGCCTCCCAGCCTCGAGGGGACCCGACAGGCCCGAAGGAACCGACGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGATCATTAGTGAACGGATTCTTAGCACTCATCTGGATCGACCTGCGGAGTCTGTTCCTCTTCAGCTACCTCCGCTTGAGAGACTTACTCTTGATTGCAGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAGGCCCTCAAATATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGTTTGCTTAATGCTATAGCTATATCAGTAGCGGAGGGAACAGATAGGATTATAGAAGCAATACAAAGAATTTGTAGAGCTATCTTACACATACCTACAAGGATAAGACAGGGCTTAGAAAGGGCTTTGCTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8359, - "end": 8451, - "orientation": "forward", - "distance": 0.40625, - "indel_impact": 0, - "protein": "RPASQPRGDPTGPKEPTKKVERETETDPDH", - "nucleotides": "AGACCCGCCTCCCAGCCTCGAGGGGACCCGACAGGCCCGAAGGAACCGACGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGATCATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8360, - "end": 8635, - "orientation": "forward", - "distance": 0.3913043478260869, - "indel_impact": 0, - "protein": "DPPPSLEGTRQARRNRRRRWRERQRQIRIISERILSTHLDRPAESVPLQLPPLERLTLDCSEDCGTSGTQGVGGPQILVESPAVLESGTKE", - "nucleotides": "GACCCGCCTCCCAGCCTCGAGGGGACCCGACAGGCCCGAAGGAACCGACGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGATCATTAGTGAACGGATTCTTAGCACTCATCTGGATCGACCTGCGGAGTCTGTTCCTCTTCAGCTACCTCCGCTTGAGAGACTTACTCTTGATTGCAGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAGGCCCTCAAATATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAAGAATAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8779, - "end": 9405, - "orientation": "forward", - "distance": 0.6323809523809525, - "indel_impact": 0, - "protein": "MGGKGSKMRGWVAVREKMRRTKPEDEPAANGVGAASRDLEKYGALTSSNTVATNADLAWLEAQEEEEVGFPVRPQVPLRPMTYKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVHHTQGYLPDWQNYTPGPGIRYPLTFGWCFKLVPVEPDQVEEANAGENNSLLHPMSLHGIEDPEKEVLMWKFDSHLAFRHMARELHPEYYKDC", - "nucleotides": "ATGGGTGGCAAGGGGTCAAAAATGAGGGGATGGGTTGCTGTAAGGGAAAAAATGAGGCGAACTAAGCCAGAAGATGAGCCAGCAGCAAATGGGGTGGGGGCAGCATCTCGAGACTTGGAGAAATATGGCGCACTCACAAGTAGCAATACAGTAGCTACTAATGCTGATTTAGCTTGGCTAGAAGCACAAGAGGAAGAGGAGGTGGGCTTTCCAGTCAGACCTCAAGTACCTTTAAGACCAATGACTTACAAGGGAGCTCTAGATCTCAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGGCAAGATATCCTTGATCTGTGGGTTCACCACACACAAGGCTATCTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATCAGATATCCCCTGACTTTTGGATGGTGCTTCAAGCTAGTACCAGTTGAGCCAGATCAGGTAGAAGAGGCCAATGCAGGAGAGAACAACAGCTTGTTACACCCTATGAGCCTGCATGGGATAGAGGACCCGGAGAAAGAAGTCTTAATGTGGAAGTTTGACAGCCACCTAGCATTCCGTCACATGGCCCGAGAGCTGCATCCGGAGTATTACAAAGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "OQ092464": [ - { - "region": "gag", - "start": 773, - "end": 2275, - "orientation": "forward", - "distance": 0.4019960079840321, - "indel_impact": 0, - "protein": "MGARASVLSGGELDRWERIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSKGCRQILEQLQPALQTGSEELKSLYNTVATLYCVHQKIDVKDTKEALDKIEEEQNKSKKKAQQAAADTGNNSQASQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQCTQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTTPGTIMMQRGNLRSQRKTVKCFNCGKEGHIAKNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKQEPIDKEMYPLASLRSLFGNDPSSQ", - "nucleotides": "ATGGGTGCGAGAGCGTCGGTATTAAGCGGGGGAGAATTAGATAGATGGGAAAGAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGGTTCGCAGTTAATCCTGGCCTGTTAGAAACATCAAAAGGCTGTAGACAAATATTGGAACAGCTACAGCCAGCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAAGATCGATGTAAAAGACACCAAGGAAGCTTTAGATAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGAAACAACAGCCAGGCCAGCCAAAATTACCCTATAGTGCAGAACATTCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCGCTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAACAATGTACACAGGAGGTAAAAAATTGGATGACAGAAACATTATTGGTCCAAAATGCAAACCCAGATTGCAAGACTATTTTAAAAGCATTAGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCTGGCCATAAAGCAAGAGTTTTGGCAGAAGCAATGAGCCAAGTAACAACTCCAGGTACCATAATGATGCAGAGAGGCAATTTAAGGAGCCAAAGAAAGACTGTTAAGTGTTTCAACTGTGGCAAAGAAGGGCATATAGCTAAAAATTGCAGGGCTCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAAGAAGGACACCAAATGAAAGATTGCACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGATAGACAAAGAAATGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA", - "subtype_start": 200, - "subtype_end": 1699, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYRLKHVVWASRELERFAVNPGLLETSEGCRQILEQLQPSLKTGSEELKSLFNTVAVLYCVHQKIEVKDTKEALDKIEEEQNKSKKKAQQAAAGTGNSNQASQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKGFNPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKDTINEEAAEWDRLHPVHAGPIPPGQMREPRGSDIAGTTSNLQEQIQWMTSNPPVPVGEIYKRWIILGLTTLVGMYSPVSILDIKQGPKDLFRDYVDRFFKTLRLEQCTQEVKGWMTDTLLVPNANPDCKTFLKALGPGPSLEELTTPGPGVGGPSHKARVLAEAMSQTTSAAVMMQKSNFKGQRRIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGRIWPSHKGRPGNFLQSRPEPSAPPEESFRFGEETTTPPQKQEPIDKELYPLASLKSLFGNDQ*SQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATAGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATGTAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTTTTAGAGACATCAGAAGGCTGTAGACAAATACTGGAACAGCTACAACCATCCCTTAAGACAGGATCAGAAGAACTTAAATCATTATTTAATACAGTAGCAGTCCTCTATTGTGTGCATCAAAAGATAGAGGTAAAAGATACCAAAGAAGCTTTAGATAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGCAGCAGCTGGCACAGGAAACAGCAACCAGGCCAGCCAAAATTACCCTATAGTGCAGAACCTTCAGGGGCAAATGGTACATCAACCCCTATCACCTAGAACTTTAAATGCATGGGTAAAGGTGGTAGAAGAGAAGGGTTTTAACCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGGGCCACTCCACAAGATTTAAACACCATGTTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGACACCATCAATGAGGAAGCTGCAGAATGGGACAGATTACATCCAGTGCATGCAGGACCTATCCCACCAGGCCAGATGAGGGAACCTAGGGGAAGTGATATAGCTGGAACAACTAGTAACCTTCAGGAACAAATACAATGGATGACAAGCAACCCACCTGTCCCAGTGGGAGAAATCTATAAAAGATGGATCATCCTAGGATTAACTACACTAGTAGGAATGTATAGCCCTGTCAGCATTTTGGACATAAAACAAGGGCCAAAAGACCTTTTTAGAGACTATGTAGACCGGTTCTTTAAAACCCTAAGACTTGAGCAATGTACACAGGAAGTAAAAGGTTGGATGACAGACACCTTGTTGGTTCCAAATGCGAACCCCGATTGTAAGACCTTTTTAAAAGCTTTGGGCCCAGGGCCTTCACTTGAAGAACTGACGACCCCTGGTCCGGGAGTGGGAGGACCTAGCCATAAGGCAAGAGTTTTGGCTGAGGCAATGAGCCAAACAACAAGTGCAGCTGTAATGATGCAGAAAAGTAACTTTAAGGGCCAAAGAAGAATTGTTAAATGTTTTAATTGTGGCAAAGAAGGACACATAGCCAAAAATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGCACTGAAAGACAGGCTAATTTTTTAGGGAGAATTTGGCCTTCCCACAAGGGAAGGCCCGGGAATTTCCTTCAGAGCAGGCCAGAGCCATCAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAGGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTGGCTTCCCTCAAATCACTCTTTGGCAACGACCAATAGTCACAGTAA" - }, - { - "region": "pol", - "start": 2068, - "end": 5079, - "orientation": "forward", - "distance": 0.1733067729083666, - "indel_impact": 0, - "protein": "FFREDLAFPQGEAREFSSEQTRANSPTRRELQVWGRDNNSLSEAGADRQRNVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEDMTLPGRWKPKMIGGIGGFIKVRQYDQILVEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVIPLTREAELELAENREILKEPVHGVYYDPTKDLIVEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFRLPIQRETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIKKDKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDDHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGGNFISTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVYIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKLQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGATAGACAAAGAAATGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGACATGACTTTACCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCGTAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTAACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATCTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATATAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATAAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAATTAAGACAACATCTGTTGAAGTGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGATATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAGGACAGTTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAAGCAATTATGTAAACTCCTTAGGGGAACCAAGGCACTAACAGAAGTAATACCACTAACAAGAGAAGCAGAGCTAGAACTGGCAGAGAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCAACAAAAGACTTAATAGTAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGAGGTGCCCACACTAATGATGTAAAACAACTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAGTTTAGACTACCCATACAAAGAGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAGACTTTCTATGTAGATGGTGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTTAATCAAATAATAGAACAGTTAATAAAAAAGGACAAGGTATACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGACGATCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGACTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGCCAGTTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACACTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCGGCAATTTCATCAGTACTACAGTTAAGGCCGCCTGCTGGTGGGCGGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATACATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAGAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAACTTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGACCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 1492, - "subtype_end": 4503, - "subtype_aminoacids": "FFRENLAFPQGKAREFPSEQARAISPTRRELQVWGGDNNSPSEAGADRQGTVSFGFPQITLWQRPIVTVKIEGQPKEALLDTGADDTVLEDINLPGKWKPKMIGGIGGFIKVKQYDNISIDICGHKATGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALIEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKKTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKEFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIEQHRTKVEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQCQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIATESIVIWGKIPKFKLPIQKETWETWWIEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKKGKAGYVTDRGRQKVVPLTDTTNQKTELQAIHLALQDSGVEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKILFLDGIDKAQEEHEKYHSNWRSMASDFNLPPIVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKTIHTDNGSNFISNTVKAACWWAGIKQEFGIPYNPQSQGVIESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAGAATTTGGCCTTCCCACAAGGGAAGGCCCGGGAATTTCCTTCAGAGCAGGCCAGAGCCATCAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAGGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTGGCTTCCCTCAAATCACTCTTTGGCAACGACCAATAGTCACAGTAAAGATAGAGGGACAGCCAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGACATAAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAAACAGTATGATAACATATCCATAGACATTTGTGGACACAAGGCTACAGGTACAGTATTAGTAGGGCCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGCTGTACTTTAAATTTCCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAATAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAGGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAGCTTAATAAGAAAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATAAGGAATTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGCAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGAGCAGCATAGAACAAAAGTAGAGGAACTGAGACAACATCTGTTAAGGTGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCCCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAGGACAGCTGGACTGTCAATGACATACAGAAATTAGTAGGAAAATTGAATTGGGCAAGTCAGATTTATCCAGGGATTAAAGTAAGGCAATTATGTAAGCTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAATGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAGTATGCAAGAACGAGGGGTGCCCACACAAATGATGTAAAACAACTAACAGAAGCAGTGCAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAGATTCCTAAATTTAAATTGCCCATACAGAAAGAGACATGGGAAACATGGTGGATAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAAAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGGGAAACTAAAAAAGGAAAAGCAGGATATGTTACTGACAGAGGAAGACAAAAAGTTGTTCCCCTGACGGACACAACAAATCAGAAGACTGAGCTACAAGCAATCCATTTAGCTTTGCAGGATTCGGGAGTAGAGGTAAACATAGTAACAGACTCCCAATATGCATTAGGAATCATCCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTTAGTCAAATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTATTTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGAAAGATACTATTTTTAGATGGAATAGATAAGGCCCAAGAGGAACATGAGAAATATCACAGTAATTGGAGATCAATGGCTAGTGATTTTAACCTGCCACCTATAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAGCTAGATTGTACACACTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATACATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAAGAAACAGCATACTTTATCTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCATCAGTAACACGGTTAAGGCCGCCTGTTGGTGGGCGGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAATAGAATCTATGAATAAGGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAAACAGCAGTGCAAATGGCAGTATTCATTCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGAGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAACTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTGCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 5024, - "end": 5602, - "orientation": "forward", - "distance": 0.30051813471502586, - "indel_impact": 0, - "protein": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYISRKAKGWVYKHHYDSTHPRISSEVHIPLGDAKLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPGLADQLIHLYYFDCFSESAIRNAILGHIVSPSCDYQAGHNKVGSLQYLALAALITPKKIKPPLPSVKKLTEDRWNKPQKTKGHRGSHTMNGH", - "nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGGACATGGAAAAGTTTAGTAAAACACCATATGTATATTTCAAGGAAAGCTAAGGGATGGGTTTATAAGCATCACTATGACAGTACTCATCCAAGAATAAGTTCAGAAGTTCACATCCCGCTAGGGGATGCGAAATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAGAAGAGATATAGCACACAAGTAGACCCTGGCTTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGCTGTGATTATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTAAGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAG", - "subtype_start": 4448, - "subtype_end": 5026, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWNSLVKHHVYVSRKARNWVYKHHYESTHPRISSEVHIPLGDAKLVVITYWGLHTGERDWHLGQGVSIEWRKRRYSTQIDPDLADQLIHLYYFDCFSESAIRNAILGRIVRPRCDYQAGHNKVGSLQYLALTALVTPKKIKPPLPSVRKLTEDRWNKPQQIKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATGTGTATGTTTCAAGGAAAGCTAGGAATTGGGTTTATAAACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAAATTAGTAGTAATAACTTATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAGGAGATATAGCACACAAATAGACCCTGACCTAGCAGACCAACTAATTCACCTGTATTATTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACGTATAGTTAGACCTAGGTGTGATTATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCACTAACAGCATTAGTAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACAAGCCCCAGCAGATCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5542, - "end": 5832, - "orientation": "forward", - "distance": 0.2268041237113403, - "indel_impact": 0, - "protein": "MEQAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHSLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGIIQRRRARNGASRS", - "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACACTTTCCTAGGCCATGGCTCCACAGCTTAGGGCAACATATCTATGAAACATATGGAGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCATCATTCAACGGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 4966, - "subtype_end": 5256, - "subtype_aminoacids": "MEQAPADQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHYRIGCQHSRIGITRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGCAGATCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAGAGCTTTTAGAGGAACTTAAGAATGAAGCTGTTAGACATTTTCCTAGACCATGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTCATTCATTACAGAATTGGATGTCAACATAGCAGAATAGGCATTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5813, - "end": 6027, - "orientation": "forward", - "distance": 0.20833333333333326, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPRTACTSCYCKQCCFHCQVCFITKGLGISYGRKKRRQRRRAPPDSQTHQASLSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGATTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTGCTTGTACCTCTTGTTATTGTAAGCAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCCAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA", - "subtype_start": 5237, - "subtype_end": 5451, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPRTACTKCYCKKCCFHCQVCFITKGLGISYGRKKRRQRRRTPQDSQTHQVSLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAACACCCAGGGAGTCAGCCTAGGACTGCCTGTACCAAATGCTACTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAAGATAGTCAGACTCATCAAGTTTCTCTATCAAAACA" - }, - { - "region": "rev_exon1", - "start": 5952, - "end": 6027, - "orientation": "forward", - "distance": 0.46153846153846145, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLQTVRLIKLLYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCCAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA", - "subtype_start": 5376, - "subtype_end": 5451, - "subtype_aminoacids": "MAGRSGDSDEELLKIVRLIKFLYQNX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAAGATAGTCAGACTCATCAAGTTTCTCTATCAAAACA" - }, - { - "region": "vpu", - "start": 6044, - "end": 6289, - "orientation": "forward", - "distance": 0.4841463414634146, - "indel_impact": 0, - "protein": "MQPLHIAAIVALVVAIIIAIVVWSIVFIEYRKILRQRKIDRIIDRIRERAEDSGNESEGDQEELSALVEMGHCAPWDVNDL", - "nucleotides": "ATGCAACCTTTACACATAGCAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCTATAGTATTCATAGAATATAGGAAAATATTAAGACAAAGGAAAATAGACAGGATAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAGGGGGATCAAGAAGAATTATCAGCACTTGTGGAGATGGGGCATTGTGCTCCTTGGGATGTTAATGATCTGTAG", - "subtype_start": 5468, - "subtype_end": 5707, - "subtype_aminoacids": "MHSLQILAIVALVVVAIIAIVVWTIVLIEYRKILRQRRIDRIIERIRERAEDSGNESEGDQEELLVEMGHDAPWDVNDL*", - "subtype_nucleotides": "ATGCACTCTTTACAAATATTAGCAATAGTAGCATTAGTAGTAGTAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGGCAAAGAAGAATAGACAGGATAATTGAGAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGCGAAGGGGACCAGGAAGAATTACTGGTGGAAATGGGGCATGATGCTCCTTGGGATGTTAATGATCTGTAA" - }, - { - "region": "env", - "start": 6207, - "end": 8789, - "orientation": "forward", - "distance": 0.546485260770976, - "indel_impact": 0, - "protein": "MRVRGIKKNYQHLWRWGIVLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLGNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNYNNTNNTTSTNDTTSTTTSSGEKMKEGEIKNCSFNITTSIRDKVQKEYALFYKPDIVPIDNDNTSYRLISCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNDKKFNGTGSCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSENFSDNAKTIIVHLNESVEITCIRPNNNTRKSIPIGPGRAFYATGAIIGDIRQAHCKINKTKWNNTLDQIFKKLREQFGNKTIIFTHSSGGDPEVVTHNFNCGGEFFYCNTTKLFNSTWNETSYWKDERSNDNDTITLPCRIKQIINLWQEVGKAMYAPPIRGYIKCSSNITGLLITRDGGKNESNTTETFRPGGGNMRDNWRSELYKYKVVKIEPLGIAPTKAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASITLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWDNMTWMEWEREIDNYTSLIYTLIEKSQNQQEKNEQELLELDKWASLWNWFDITKWLWYIKIFIMIVGGLVGLRIVFAVLSIANRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSGPLVNGFLTIIWVDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWELLKYWWNLLQYWSQELKNSAISLLNATAIAVAEGTDRIIEVLQRAGRAFLHIPRRIRQGLERALL", - "nucleotides": "ATGAGAGTGAGGGGGATCAAGAAGAATTATCAGCACTTGTGGAGATGGGGCATTGTGCTCCTTGGGATGTTAATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACTACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCTACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGGAAATGTGACAGAGAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGTCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTGAATTGCACTAATTATAATAACACCAATAATACCACTAGTACCAATGATACCACTAGTACCACTACTAGTAGCGGGGAAAAGATGAAGGAAGGAGAGATAAAAAACTGCTCTTTCAATATCACCACAAGCATAAGAGATAAGGTGCAGAAAGAATATGCACTCTTTTATAAACCTGATATAGTACCAATAGATAATGATAATACTAGTTATAGGTTGATAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCAGCTGGTTTTGCGATTCTAAAGTGTAACGATAAGAAGTTCAATGGAACAGGATCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCTCAGACAATGCTAAAACCATAATAGTACATCTGAATGAATCTGTAGAAATTACTTGTATAAGACCCAACAACAATACAAGAAAAAGTATACCTATAGGACCAGGGAGAGCATTTTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAAAATTAATAAAACGAAATGGAATAACACTTTAGACCAGATATTTAAAAAGTTAAGAGAACAATTTGGGAATAAGACAATAATCTTTACTCACTCCTCAGGAGGGGACCCAGAAGTTGTAACGCACAATTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACAACAAAACTGTTTAATAGTACTTGGAATGAGACTAGTTATTGGAAGGATGAAAGGTCAAATGACAATGACACTATCACACTCCCATGCAGAATAAAGCAAATTATAAACCTGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGATACATTAAATGTTCATCAAATATCACAGGGCTGCTAATAACAAGAGATGGTGGGAAAAACGAGAGCAACACCACCGAGACCTTCAGACCTGGAGGAGGAAATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAATAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATAACGCTGACGGTACAGGCCAGACTATTATTGTCTGGTATAGTGCAACAGCAAAACAATCTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGGATAACATGACCTGGATGGAATGGGAAAGAGAAATTGACAATTACACAAGCTTAATATACACTTTAATTGAAAAATCGCAAAACCAACAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTGACATAACAAAGTGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGCGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCAGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGACCATTAGTGAACGGATTCTTAACAATTATCTGGGTCGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAACTCCTCAAATATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAAGAATAGTGCTATTAGCTTGCTTAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGATTATAGAAGTATTACAAAGAGCTGGGAGAGCTTTTCTCCACATACCTAGAAGAATAAGACAGGGCTTAGAAAGGGCTTTGCTATAA", - "subtype_start": 5631, - "subtype_end": 8207, - "subtype_aminoacids": "MRAKGTRKNYWWKWGMMLLGMLMICNAAEQSWVTVYYGVPVWKEASTTLFCASDASAVDTEVHNVWATHACVPTDPNPQEIVLENVTENFNMWKNNMVDQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNWNTTNKLNATDTNSSRIEEEMKEEMKNCSFNVTSSIGNKMQKEYALFYKLDVVPINNDSTSYTLINCNTSVITQACPKVSFEPIPIHYCTPAGFALLKCNDKKFNGTGPCTNVSTVQCTHGIKPVVSTQLLLNGSLAEEEIVIRSENFSNNAKTIIVQLNETVEITCERPNNNTRKGIHLGFGRALYATGEIIGDIRQAYCNLNRTKWENTLKRIVTKLGEQFKNQNKTITFDLSSGGDPEIMLHSFSCGGEFFYCNTTQLYNSNRKQENNGTWNNNGSNTNDNITLPCRIKQIINRWQEVGKAMYAPPITGAIRCLSNITGLLLTRDGGTRANLSETFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGTIGAMFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTAVPWNTSWSNKSLDEIWNNMTWMQWEKEIDNYTGVIYNLIEEAQNQQEKNEQELLQLDTWASLWNWFNITKWLWYIKIFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTRLPAQRGPDRPGGIEEEGGERDKDRSIRLVDGFLAIIWEDLRSLCLFSYHRLRDLLLIVTRVVELLGRRGWEALKYCWNLLQYWSQELRNSAVSLLNATAIAVAEGTDRIIEIVQIICRAILHIPRRIRQGLERALQ*", - "subtype_nucleotides": "ATGAGAGCGAAGGGGACCAGGAAGAATTACTGGTGGAAATGGGGCATGATGCTCCTTGGGATGTTAATGATCTGTAATGCTGCAGAACAATCGTGGGTCACAGTCTACTATGGGGTGCCTGTGTGGAAAGAAGCATCCACCACCTTATTTTGTGCATCAGATGCTAGCGCAGTTGACACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCTAACCCACAGGAAATAGTATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGATCAAATGCATGAAGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATTGGAATACTACTAATAAGTTGAATGCTACTGATACCAATAGTAGTAGAATAGAGGAAGAAATGAAGGAAGAAATGAAAAACTGCTCTTTCAATGTCACCTCAAGCATAGGAAATAAGATGCAGAAAGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAAATAATGATAGTACAAGCTATACACTGATAAATTGTAACACCTCAGTCATTACACAAGCCTGTCCAAAGGTATCCTTTGAACCAATTCCCATACATTATTGTACCCCAGCTGGTTTTGCGCTTCTAAAGTGTAATGATAAGAAATTCAATGGAACAGGACCATGTACAAATGTCAGTACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTCTAGCAGAAGAAGAAATAGTGATTAGATCTGAAAATTTCTCGAACAATGCTAAAACCATAATAGTACAGCTGAATGAAACTGTAGAAATTACTTGTGAAAGACCCAACAACAATACAAGAAAAGGTATACATCTAGGATTTGGGAGAGCATTGTATGCAACAGGAGAAATAATAGGAGATATAAGACAAGCATATTGTAACCTTAATAGAACAAAATGGGAAAACACTTTAAAACGGATAGTTACAAAATTAGGAGAACAATTTAAAAATCAAAATAAAACAATAACCTTCGATCTCTCCTCAGGAGGGGACCCAGAAATTATGCTGCACAGTTTCAGTTGTGGAGGGGAATTCTTCTACTGTAATACAACACAGCTGTATAATAGTAATAGGAAGCAGGAGAATAATGGCACTTGGAATAATAATGGGAGTAATACTAATGATAATATCACACTCCCATGCAGAATAAAACAAATTATAAACAGGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCACAGGAGCAATTAGATGTTTATCAAATATTACAGGGCTGTTATTAACAAGAGATGGTGGAACGAGAGCGAATCTGTCCGAGACCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAACAATAGGAGCTATGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATGACGCTGACGGTACAAGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGGGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTAGAAAGATACCTAAAGGATCAACAGCTCCTAGGGATTTGGGGTTGCTCTGGAAAACTCATCTGCACCACTGCTGTGCCTTGGAATACTAGTTGGAGTAATAAATCTCTAGATGAAATTTGGAACAACATGACCTGGATGCAGTGGGAAAAAGAAATTGACAATTACACAGGCGTAATATACAATTTAATTGAAGAAGCGCAAAACCAACAAGAAAAGAATGAACAAGAATTATTGCAATTGGATACGTGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTATATAAAGATATTCATAATGATAGTAGGAGGCTTAGTAGGTTTAAGAATAGTTTTTGCTGTACTCTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTCCAGACCCGCCTCCCAGCCCAGAGGGGACCCGACAGGCCCGGAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAAGGACAGATCCATAAGATTAGTAGATGGATTCTTAGCAATCATCTGGGAAGACCTACGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTGACGAGGGTTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGCTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAGGAATAGTGCTGTTAGTTTGCTTAATGCCACAGCTATAGCAGTAGCTGAGGGAACAGATAGGATTATAGAAATAGTACAAATAATTTGTAGAGCTATTCTCCACATACCTAGACGAATAAGACAGGGCTTAGAAAGAGCTTTGCAATAA" - }, - { - "region": "tat_exon2", - "start": 8371, - "end": 8463, - "orientation": "forward", - "distance": 0.59375, - "indel_impact": 0, - "protein": "RPTSQPRGDPTGQKESKKKVERETETDPDH", - "nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCAGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGACCATTAG", - "subtype_start": 7789, - "subtype_end": 7881, - "subtype_aminoacids": "RPASQPRGDPTGPEESKKKVERETRTDP*D*", - "subtype_nucleotides": "AGACCCGCCTCCCAGCCCAGAGGGGACCCGACAGGCCCGGAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAAGGACAGATCCATAAGATTAG" - }, - { - "region": "rev_exon2", - "start": 8372, - "end": 8647, - "orientation": "forward", - "distance": 0.3978494623655915, - "indel_impact": 0, - "protein": "DPPPSPEGTRQARRNRRRRWRERQRQIRTISERILNNYLGRPAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGTPQILVESPAVLESGTKE", - "nucleotides": "GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCAGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGACCATTAGTGAACGGATTCTTAACAATTATCTGGGTCGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAACTCCTCAAATATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAAGAATAG", - "subtype_start": 7790, - "subtype_end": 8065, - "subtype_aminoacids": "DPPPSPEGTRQARRNRRRRWRERQGQIHKISRWILSNHLGRPTEPVPLQLPPLERLTLDCDEGCGTSGTQGVGSPQILLESPAVLESGTKE*", - "subtype_nucleotides": "GACCCGCCTCCCAGCCCAGAGGGGACCCGACAGGCCCGGAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAAGGACAGATCCATAAGATTAGTAGATGGATTCTTAGCAATCATCTGGGAAGACCTACGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTGACGAGGGTTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGCTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAGGAATAG" - }, - { - "region": "nef", - "start": 8791, - "end": 9411, - "orientation": "forward", - "distance": 0.3542857142857141, - "indel_impact": 0, - "protein": "MGGKWSKSSVVGWPAIRERMRRAEPAADGVGAASRDLEKHGAITSSNTAATNADCAWLEAQEDEEVGFPVRPQVPLRPMTYKGAVDLSHFLKEKGGLEGLIHSQKRQDILDLWVYHTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPVEPDKVEEANEGENNSLLHPMSLHGMDDPEKEVLVWRFDSRLAFHHMARELHPEYYKDC", - "nucleotides": "ATGGGTGGTAAGTGGTCAAAGAGTAGTGTGGTTGGATGGCCTGCTATAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATGGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGTGCCTGGCTAGAAGCACAAGAGGATGAGGAGGTGGGTTTTCCAGTCAGGCCTCAGGTACCTTTAAGACCAATGACTTACAAGGGAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAAAAGACAAGATATCCTTGATCTGTGGGTCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATCAGGTATCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGAGCCAGATAAGGTAGAAGAGGCCAATGAAGGAGAGAACAACAGCTTGTTACACCCTATGAGCCTGCATGGGATGGATGACCCAGAGAAAGAAGTGTTAGTATGGAGGTTTGACAGTCGCCTGGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAGTACTACAAGGACTGCTGA", - "subtype_start": 8209, - "subtype_end": 8841, - "subtype_aminoacids": "MGGKWSKCSMGGWTAVRERMRRTEPATEPAADGVGAVSRDLEKHGAITSSNTAATNADCAWLEAQEEEEVGFPVRPQVPLRPMTYKGALDLSHFLKEKGGLEGLIYSQKRQEILDLWVYNTQGYFPDWQNYTKGPGIRYPLTFGWCFKLVPVDPEQVEKANEGENNSLLHPMSQHGMDDPEKEVLMWKFDSRLAFRHVARELHPEYYKDC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAATGTAGCATGGGTGGGTGGACTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAACTGAGCCAGCAGCAGATGGGGTGGGAGCAGTATCTCGAGACTTGGAAAAACATGGAGCAATTACAAGTAGCAATACAGCAGCTACTAATGCTGACTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAAGTGGGCTTCCCAGTCAGACCTCAGGTACCTTTAAGACCAATGACTTACAAAGGAGCTTTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAGAAAAGACAAGAGATCCTTGATCTGTGGGTCTACAACACACAAGGATACTTCCCTGATTGGCAGAACTACACAAAAGGGCCAGGGATCAGATATCCACTGACCTTCGGATGGTGCTTCAAGCTAGTGCCAGTTGATCCAGAACAGGTAGAAAAGGCCAATGAAGGAGAGAACAACAGCCTGTTACACCCTATGAGCCAGCATGGGATGGATGACCCAGAGAAAGAAGTGTTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCGCCACGTAGCCAGAGAGCTGCATCCGGAGTATTACAAAGACTGCTGA" - } - ], - "OQ092467": [ - { - "region": "gag", - "start": 808, - "end": 2307, - "orientation": "forward", - "distance": 0.2507968127490039, - "indel_impact": 0, - "protein": "MGARASVLSGGELDKWEKIRLRPGGKKQYKLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPALQTGSEEFRSLYNTVATLYCVHQKIEVKDTKEALEKIEEEQNQSKKKAAAAAADTGNRSQVSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVIEEKAFSPEVIPMFTALSEGATPQDLNTMLNTIGGHQAAMQMLKETINEEAAEWDRLHPVQAGPVAPGQMREPRGSDIAGTTSNLQEQIGWMTHNPPIPVGDIYKRWIILGLNKIVRMYSPVSILDIKQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSTVMMQRGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSKPEPTAPPEESFRFGEETTTPSQKQGPIDKELYPLASLKSLFGNDPSSQ", - "nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATAAATGGGAAAAAATTCGGTTAAGGCCAGGAGGAAAAAAACAATACAAATTAAAACATCTAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTTCTAGAGACATCAGAAGGGTGTAGACAAATACTGGGACAGCTACAACCAGCTCTTCAGACAGGATCAGAAGAATTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAAGATAGAGGTAAAAGACACCAAGGAAGCTTTAGAGAAAATAGAGGAAGAGCAAAACCAAAGTAAGAAAAAAGCAGCAGCTGCAGCAGCTGACACAGGAAACAGAAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCTTTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAATAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTACAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAATAGGTGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATTAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCAGGCAGGGCCTGTTGCACCAGGCCAAATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTAACCTTCAGGAACAAATAGGATGGATGACACATAATCCACCTATCCCAGTAGGAGATATCTATAAAAGATGGATAATTCTGGGGTTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTGAGAGCCGAGCAAGCAACACAGGAAGTAAAGAATTGGATGACAGAAACTTTGCTGGTCCAAAATGCAAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGGGGACCAGGCCACAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAACTGTAATGATGCAGAGAGGCAATTTTAGGAATCAAAGAAAGACAGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCATATAGCAAGAAATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGAAATGTGGAAAAGAAGGGCACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAGCAAACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGGGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 2100, - "end": 5111, - "orientation": "forward", - "distance": 0.14342629482071723, - "indel_impact": 0, - "protein": "FFREDLAFPQGEAREFSSEQTRANSPTRRELQVWGRDNNSLSEAGADRQGTVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEDMNLPGKWKPKMIGGIGGFIKVRQYDQILVEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTRILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLKWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIKLPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTAEAELELAENREILKEPVHGVYYDPTKDLIAELQKQGQSQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIAIESIVIWGKTPKFKLPIQKETWETWWTDYWQATWIPDWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETRSGKAGYVTDRGRQKVVPLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSSGIRKILFLDGITKAQDDHERYHSNWRAMASDFNLPPIIAKEIVASCDKCQLKGEATHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTVHTDNGSNFTSAAVKAACWWAGIKQEFGIPYNPQSQGVIESMNKELKKIIGQVREQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAGCAAACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGGGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAAAAATAGGGGGGCAGCTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGATATGAATTTACCAGGAAAATGGAAACCAAAAATGATAGGAGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCGTAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGCTGTACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGATAGTACTAAATGGAGAAAGTTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCGCATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTAGATGTGGGTGATGCCTATTTTTCAATTCCCTTAGATAAAGACTTCAGGAAGTACACTGCATTTACCATACCTAGTACAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAGGATATTAGAACCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTGGGATCTGACTTAGAAATAGGACAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTGAAGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAAAGCTGCCAGAAAAAGATAGCTGGACTGTCAATGACATCCAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTATGCAGGAATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACTAAAGCACTAACAGAAGTAGTACCACTAACAGCAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGAACCGGTACATGGAGTGTATTATGACCCAACGAAAGACCTAATAGCAGAACTACAGAAGCAGGGGCAAAGTCAATGGACATATCAAATTTATCAAGAACCATTTAAAAATCTGAAAACAGGAAAGTATGCAAGAATGAGGGGTGCCCACACTAATGATGTGAAACAATTAACAGAGGCAGTGCAAAAAATAGCCATAGAAAGCATAGTAATATGGGGAAAGACCCCTAAATTTAAATTACCTATACAAAAAGAAACATGGGAAACATGGTGGACAGACTATTGGCAAGCCACCTGGATTCCTGATTGGGAGTTTGTCAATACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAAAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAGATCAGGCAAAGCAGGATATGTTACTGATAGAGGAAGACAAAAGGTTGTCCCCCTAACAGACACAACAAACCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCAGGATTAGAAGTAAACATAGTGACAGACTCACAATATGCACTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGTTAATAAAGAAGGAAAAGGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTTCTGGAATCAGGAAGATACTATTTTTAGATGGAATAACTAAGGCCCAAGATGATCATGAGAGATACCACAGCAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTATAATAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCACGCATGGACAAGTAGACTGCAGTCCAGGAATATGGCAACTAGATTGTACGCATTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAAGAAACAGCATACTTTCTCTTAAAACTAGCAGGAAGATGGCCAGTAAAGACAGTACATACAGATAATGGCAGCAATTTCACCAGTGCTGCGGTTAAGGCCGCCTGTTGGTGGGCGGGGATCAAGCAGGAATTTGGTATTCCCTACAATCCCCAAAGTCAAGGAGTCATAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAAGTAAGAGAGCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAATTACAGAAACAAATCACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGACCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAGGCAAAGATCATTAGAGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 5056, - "end": 5634, - "orientation": "forward", - "distance": 0.44041450777202074, - "indel_impact": 0, - "protein": "MENRWQVMIVWQVDRMRIRTWNSLVKHHIHVSKKAKGWVYRHHYESTHPRISSEVHIPLGEARLVIATYWGLHTGERDWHLGQGVSIEWREKKYITQVDPDLADQLIHLHYFDCFSESAIRNAIVGRLVSPQCEYQTGHNKVGSLQYLALVALITPKKRKPPLPSVRKLTEDRWNKPQKTKDHRGSHIMSGH", - "nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATACATGTCTCAAAGAAAGCTAAGGGATGGGTTTATAGACACCACTATGAAAGCACCCATCCAAGAATAAGTTCAGAAGTACATATCCCGCTAGGGGAAGCTAGATTAGTAATAGCAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCACTTGGGTCAGGGAGTCTCCATAGAATGGAGGGAAAAGAAATATATCACACAAGTAGACCCTGACCTAGCAGACCAACTAATTCATCTGCATTATTTTGATTGTTTTTCAGAGTCTGCTATAAGAAATGCCATAGTAGGACGTTTAGTTAGCCCTCAGTGTGAATATCAAACAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCACTAGTAGCATTAATAACGCCAAAAAAGAGAAAGCCACCTTTGCCTAGTGTTAGGAAATTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGACCACAGAGGGAGCCATATAATGAGTGGACACTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5574, - "end": 5864, - "orientation": "forward", - "distance": 0.28125, - "indel_impact": 0, - "protein": "MEQAPEDQGPQREPYNEWTLELLEELKSEAVRHFPRIWLHNLGQYIYATYGDTWTGVEALIRILQQLLFIHFRIGCQHSRIGITRQRRARNGASRS", - "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGACCACAGAGGGAGCCATATAATGAGTGGACACTAGAGCTTTTAGAGGAACTTAAGAGTGAAGCTGTTAGACATTTTCCTAGGATATGGCTCCATAACTTAGGACAATACATCTATGCAACTTATGGGGATACTTGGACAGGAGTGGAAGCTTTAATAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGAATTGGATGTCAACATAGCAGAATAGGCATTACTCGACAGAGGAGAGCAAGGAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5845, - "end": 6059, - "orientation": "forward", - "distance": 0.25, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRPPQNSETHQAALSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAGACTCCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTCTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAAACAGTGAGACTCATCAAGCAGCTCTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5984, - "end": 6059, - "orientation": "forward", - "distance": 0.46153846153846145, - "indel_impact": 0, - "protein": "MAGRSGDSDEDLLKTVRLIKQLYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAAACAGTGAGACTCATCAAGCAGCTCTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 6076, - "end": 6339, - "orientation": "forward", - "distance": 0.6090909090909091, - "indel_impact": 0, - "protein": "MQPLVILAIVALVVAAIIAIVVWTIVLIEYRKILRQRKIDSIINRIRERAEDSGNESEGDQEELSALVEMGHHVEMGHHAPWNVDDL", - "nucleotides": "ATGCAACCCTTAGTAATATTAGCAATAGTAGCATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAGAAAGATAGATAGCATAATTAATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGATCAGGAAGAATTGTCGGCACTTGTGGAGATGGGGCACCATGTGGAAATGGGGCACCATGCTCCTTGGAATGTTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 6239, - "end": 8806, - "orientation": "forward", - "distance": 0.5505630630630636, - "indel_impact": 0, - "protein": "MRVKEIRKNCRHLWRWGTMWKWGTMLLGMLMICSAKEQLWVTVYYGVPVWKEATTTLFCASNAKAYDPEVHNVWATHACVPTDPNPQEVPLENVTENFNMWKNNMVEQMHEDIISLWDQSLKPCVKLTPLCVILNCTNVNVTTNNNSSSEEQMEVGEIKNCSFNIATRIKNKIKKEYALFNRLDVVPIEDDNTSYMLINCNTSVTTQACPKVTFEPIPIHYCAPAGFAILKCNDKKFNGTGPCNNVSTVQCTHGIKPVVSTQLLLNGSLAEEEIVVRSENFTNNAKTIIVQLNKTIEINCIRPNNNTRKSISLRPGQAIYATEDIIGNIRQAHCNIRRKDWDKALEQVVAKLREQFKNKTIVFNQSSGGDPEIVMHSFNCAGEFFYCNTTKLFNSTWNVNNTRNNTTDNSTITLPCRIKQIINRWQEVGKAMYAPPIKGQIKCSSNITGLLLTRDGGVREDNAPEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQRGKRAVTLGAMFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLQDQQLLGIWGCSGKLICTTAVPWNTSWSNKSLEKIWKNMTWMEWEKEINNYTRTIYTLIEESQNQQEKNEQELLELDKWASLWNWFDITNWLWYIKIFIMIVGGIVGLRIVFTVLSIVNRVRQGYSPLSFQTRFPAPRGPDRPEGTEEEGGERDRDRSGQLVNGFFALIWDDLQSLCLFSYRRLRDLLLIVARIVELLGHRGWEALKYWWNLLQYWSQELKKSAVSLLNATAIAVAEGTDRVIEVVQRIGRAILHIPRRIRQGLERALL", - "nucleotides": "ATGAGAGTGAAGGAGATCAGGAAGAATTGTCGGCACTTGTGGAGATGGGGCACCATGTGGAAATGGGGCACCATGCTCCTTGGAATGTTGATGATCTGTAGTGCTAAAGAACAATTGTGGGTCACAGTTTATTATGGGGTACCTGTGTGGAAAGAAGCAACCACCACTTTATTTTGTGCATCAAATGCTAAAGCATATGACCCAGAGGTGCATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCTAACCCACAAGAAGTACCATTGGAAAATGTGACAGAAAATTTTAATATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGACATAATCAGTTTATGGGATCAAAGCTTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTATTTTAAATTGCACTAATGTGAATGTTACTACTAACAATAATAGTAGTAGTGAGGAACAGATGGAGGTAGGAGAAATAAAAAACTGCTCTTTCAATATTGCCACAAGAATAAAAAATAAGATAAAGAAAGAATATGCACTTTTTAATAGACTTGATGTAGTACCAATAGAGGATGATAATACAAGCTATATGTTGATAAATTGTAATACCTCAGTCACTACACAGGCCTGTCCAAAGGTAACCTTTGAACCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATACTAAAATGTAATGATAAAAAGTTCAATGGAACAGGACCATGTAACAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTGCTGTTAAATGGTAGTCTAGCAGAAGAAGAGATAGTAGTTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACAGCTGAATAAAACTATAGAAATTAATTGTATAAGACCCAACAATAATACAAGAAAAAGTATATCTTTAAGACCGGGGCAAGCAATTTATGCAACAGAAGACATAATAGGAAATATAAGACAAGCACATTGTAACATTAGGAGAAAAGACTGGGATAAAGCTTTAGAACAGGTAGTTGCAAAATTAAGAGAACAATTTAAGAATAAAACAATAGTCTTTAATCAATCCTCAGGAGGAGACCCAGAAATTGTAATGCATAGTTTTAATTGTGCAGGGGAATTTTTCTACTGTAACACAACAAAGCTGTTTAATAGTACTTGGAATGTTAATAACACTCGGAATAATACTACTGATAATAGCACCATCACTCTCCCGTGCAGAATAAAACAAATTATAAACAGATGGCAGGAAGTAGGAAAAGCAATGTATGCTCCTCCCATCAAAGGGCAAATTAAATGTTCATCAAATATTACAGGGTTATTATTAACAAGAGATGGTGGTGTCCGCGAGGACAACGCCCCTGAGATCTTTAGACCTGGAGGAGGAGATATGAGGGATAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTGGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAAAGAGGAAAAAGAGCAGTAACGCTGGGAGCTATGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATGACGCTGACGGTACAGGCCAGGCAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATCTGCTGAGGGCTATTGAGGCGCAACAGCATATGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACTTACAGGATCAACAGCTCCTAGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTACCTTGGAATACTAGTTGGAGTAATAAATCTTTGGAAAAGATTTGGAAAAACATGACCTGGATGGAGTGGGAGAAAGAAATTAACAATTACACAAGGACAATATACACCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAGGAATTATTGGAATTGGATAAGTGGGCAAGTTTGTGGAATTGGTTTGACATAACAAACTGGCTGTGGTATATAAAAATATTCATCATGATAGTAGGAGGTATAGTAGGGTTAAGAATAGTTTTTACTGTGCTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTGTCATTCCAGACCCGCTTCCCAGCTCCGAGGGGACCAGACAGGCCCGAAGGAACAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGGCAATTAGTGAACGGATTCTTTGCGCTTATCTGGGACGACCTGCAGAGCCTGTGCCTCTTCAGCTACCGCCGCTTGAGAGACTTACTCTTGATTGTAGCGAGGATTGTGGAACTTCTGGGACACAGGGGGTGGGAGGCCCTCAAATATTGGTGGAATCTCCTGCAGTACTGGAGTCAGGAACTAAAGAAGAGTGCTGTTAGTTTGCTTAATGCCACAGCTATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAATTGGTAGAGCTATTCTCCACATCCCTAGAAGAATAAGACAGGGCTTAGAAAGGGCTTTGCTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8388, - "end": 8480, - "orientation": "forward", - "distance": 0.5806451612903225, - "indel_impact": 0, - "protein": "RPASQLRGDQTGPKEQKKKVERETETDPGN", - "nucleotides": "AGACCCGCTTCCCAGCTCCGAGGGGACCAGACAGGCCCGAAGGAACAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGGCAATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8389, - "end": 8664, - "orientation": "forward", - "distance": 0.3913043478260869, - "indel_impact": 0, - "protein": "DPLPSSEGTRQARRNRRRRWRERQRQIRAISERILCAYLGRPAEPVPLQLPPLERLTLDCSEDCGTSGTQGVGGPQILVESPAVLESGTKEECC", - "nucleotides": "GACCCGCTTCCCAGCTCCGAGGGGACCAGACAGGCCCGAAGGAACAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGGCAATTAGTGAACGGATTCTTTGCGCTTATCTGGGACGACCTGCAGAGCCTGTGCCTCTTCAGCTACCGCCGCTTGAGAGACTTACTCTTGATTGTAGCGAGGATTGTGGAACTTCTGGGACACAGGGGGTGGGAGGCCCTCAAATATTGGTGGAATCTCCTGCAGTACTGGAGTCAGGAACTAAAGAAGAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8808, - "end": 9416, - "orientation": "forward", - "distance": 0.583333333333333, - "indel_impact": 0, - "protein": "MGGKWSKCSLVGWPAIRERMRRAEPAPAAEGVGAASRDLEKHGALTTSNTAANNAACAWLEAQEEEEVGFPVRPQVPLRPMTYKGAMDLGHFLKEKGGLEGLIYSPKRQEILDLWVYHTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPDEGENSCLLHPMNQHGADDTEREVLMWKFDSSLAFHHKARELHPEYYKNC", - "nucleotides": "ATGGGTGGCAAGTGGTCAAAATGTAGTCTGGTTGGATGGCCTGCTATAAGGGAAAGAATGAGACGAGCTGAGCCAGCTCCAGCAGCAGAAGGGGTGGGAGCAGCATCTCGAGACTTGGAAAAACATGGAGCACTCACAACTAGTAATACAGCAGCTAATAATGCTGCTTGTGCCTGGCTGGAAGCACAAGAGGAGGAAGAGGTGGGGTTTCCAGTCAGACCTCAGGTACCTTTAAGACCAATGACTTACAAAGGAGCTATGGATCTTGGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCCAAAAAGACAAGAAATCCTTGATCTGTGGGTCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTATACACCAGGGCCAGGGACTAGATATCCATTAACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGAGCCAGATGAAGGAGAGAATAGCTGTTTGCTACACCCGATGAACCAGCATGGGGCAGATGACACAGAAAGAGAAGTATTAATGTGGAAGTTTGACAGCAGCCTAGCATTTCATCACAAGGCCCGAGAGCTGCATCCGGAGTACTACAAAAACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ] -} \ No newline at end of file +{} \ No newline at end of file diff --git a/tests/expected-results-large/subtypes.fasta b/tests/expected-results-large/subtypes.fasta index 94c83ea..569efb5 100644 --- a/tests/expected-results-large/subtypes.fasta +++ b/tests/expected-results-large/subtypes.fasta @@ -161,454 +161,3 @@ CTGGGGAGTGGCGAGCCCTCAGATCCTGCATATAAGCAGCTGCTTTTTGCCTGTACTGGG TCTCTCTGGTTAGACCAGATCTGAGCCTGGGAGCTCTCTGGCTAACTAGGGAACCCACTG CTTAAGCCTCAATAAAGCTTGCCTTGAGTGCTTCAAGTAGTGTGTGCCCGTCTGTTGTGT GACTCTGGTAACTAGAGATCCCTCAGACCCTTTTAGTCAGTGTGGAAAATCTCTAGCA ->Ref.47_BF.ES.08.P1942.GQ372987 -AGGGTTGCCCGTCTGTTGTGTGACTCTGGTAACTAGAGATCCCTCAGACCATTTAGTCAG -TGTGGAAAAATCTCTAGCAGTGGCGCCCGAACAGGGACCCGAAAGCGAAAGTAGAACCAG -AGAAGCTCTCTCGACGCAGGACTCGGCTTGCTGAAGCGCGCTCGGCAAGAGGCGAGGGGC -GGCGACTGGTGAGTACGCCAAAAATTTTGACTAGCGGAGGCTAGAAGGAGAGAGATGGGT -GCGAGAGCGTCAGTATTAAGCGGGGGAGAATTGGATAGATGGGAAAAAATTCGGTTAAGG -CCAGGAGGAAAGAAAACATATAAATTGAAACATATAGTATGGGCAAGCAGGGAGCTAGAA -CGATTCGCTCTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGA -CAGCTACAACCAGCCCTTCAGACAGGATCAGAAGAACTTAGATCACTATTTAATGCAGTA -GCAACCCTCTATTGTGTGCATCAAAAGATCGATGTAAAAGACACCAAGGAAGCTTTAGAT -AAAATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGTACAGCAAGCAGCTGACACAGGA -AATAACAACCAGGTCAGCCAAAATTACCCTATAGTGCAGAACGCCCAGGGGCACATGATA -CATCAGCCTATTTCACCTAGAACTTTAAATGCATGGGTAAAGGTGGTAGAAGAAAAGGCT -TTTAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGATGCACCCCACAAGAT -TTAAACACCATGTTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGCTAAAAGAC -ACCATTAATGAGGAAGCTGCAGAATGGGACAGATTACATCCAGTGCATGCAGGACCTATC -CCACCAGGCCAGATGAGGGAACCTAGGGGAAGTGATATAGCTGGAACTACCAGTACCCTT -CAGGAACAAATACAATGGATGACAAGCAACCCACCTGTCCCAGTGGGAGATATCTATAAA -AGATGGATCATCCTAGGATTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTTTG -GACATAAAACAAGGGCCAAAAGAACCCTTTAGAGATTATGTGGATAGGTTCTTTAAAGTC -CTAAGAGCCGAGCAAGCTACACAGGATGTAAAAAATTGGATGACAGAAACCTTGTTGGTC -CAAAATTCGAACCCAGATTGTAAGACCATTTTAAAAGCATTGGGACCACAGGCTACACTA -GAAGAAATGATGACAGCATGCCAAGGAGTGGGAGGGCCCGGCCATAAAGCAAGAGTTTTG -GCAGAAGCAATGAGCCAAGCAACAGCTTCAAATGTCATAATGATGCAGAAAGGCAATTAT -AGGGGCCAGAGAAAGATTGTCAAGTGTTTCAATTGTGGCAGAGAAGGACACATAGCCAAA -AATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAGTGTGGAAAAGAAGGACACCAAATG -AAAGACTGCACTGAAAGACAGGCTAATTTTTTAGGGAAAATTTGGCCTTCCAGCAAGGGG -AGGCCAGGGAATTTTCTCCAGAACAGGCCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTG -GGGTTTGGAGAGGAGACAACCCCCTCTCCGAAGCAGGAACAGAAAGAGGGACTGTATCCT -CCCTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTAGTCACAGTAAAAGTAGGGG -GACAGATGAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGACATAA -ATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAA -GACAGTATGATCAAATAATCATAGAAATTTGTGGAAAGAAAGCTATAGGTACAGTATTAG -TAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAAATTGGTTGCACTT -TAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTGAAGCCAGGAATGGATG -GCCCAAAGGTCAAACAATGGCCATTAACGGAAGAAAAAATAAAAGCATTAATAGAAATCT -GTACAGAAATGGAAAAGGAAGGGAAAATCTCAAAAATTGGGCCTGAAAATCCATACAATA -CTCCAGTATTTGCCATAAAGAAAAAAGACAGTACAAAATGGAGAAAATTAGTAGATTTTA -GAGAACTTAATAAAAGAACTCAAGATTTTTGGGAGGTTCAATTAGGAATACCGCATCCTG -CAGGATTAAAAAAGAAAAAATCAGTAACAGTATTAGATGTGGGAGATGCATATTTTTCAG -TTCCCTTATATGAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAACAATG -AGACACCAGGGATTAGATATCAGTACAATGTACTTCCACAGGGATGGAAAGGATCACCAG -CAATATTTCAAAGCAGCATGACGAAAATCTTAGAGCCTTTTAGAAAACAGAATCCAGACA -TAGTGATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGC -ATAGAACAAAAATAGAGGAACTGAGACAACATCTACTGAAATGGGGGCTTACTACACCAG -ACAAAAAACATCAGAAAGAACCTCCCTTCCTTTGGATGGGTTATGAACTACATCCTGATA -AATGGACAGTACAGCCTATAGTGCTGCAAGAAAAGGACAACTGGACTGTCAATGACATAC -AGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGAC -AATTATGCAGACTCCTTAGGGGAACCAAGGCACTAACAGACATAGTACCACTAACAAAAG -AGGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAAAGAACCAGTACATGGGGTGT -ATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGA -CATATCAAATCTATCAGGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGGTCAA -GGGGTGCCCACACTAATGATGTAAGACAGTTAACAGAGGCAGTGCAAAAAATAACCACAG -AAAGCATAGTAATATGGGGGAAAACTCCTAAATTTAGACTACCCATACAAAGAGAGACAT -GGGAAACATGGTGGACAGACTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCA -ATACCCCCCCTCTAGTAAAGTTATGGTACCAGTTAGAGAAAGAACCCATAATAGGAGCAG -AAACTTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATG -TTACTAATAAAGGAAGACAGAAAGTTGTCTCCCTAACTGACACAACCAATCAGAAGACTG -AGTTACAAGCAATCCTTCTAGCTTTACAGGATTCAGGATTAGAAGTAAACATAGTAACAG -ACTCACAGTATGCATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAATTAG -TCAATCAAATAATAGAGCAATTAATAAACAAGGAAAAGGTCTACCTGTCATGGGTACCAG -CACACAAAGGGATTGGAGGAAATGAACAAGTAGATAAATTAGTTAGTGCTGGAATCAGGA -AAGTACTATTTTTAGATGGGATAGATAAAGCCCAAGAAGAACATGAAAAATATCACAATA -ATTGGAGGGCAATGGCCAGTGATTTTAACATCCCACCTGTGGTAGCAAAAGAGATAGTAG -CCAGCTGTGATAAATGTCAGCAAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTC -CAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTC -ATGTGGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAG -CATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATG -GCAGCAATTTCACCAGCACTACAGTTAAGGCCGCCTGTTGGTGGGCGGGGATCAAGCAGG -AATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATAAATAAGGAAT -TAAAGAAAATTATAGGACAGATAAGAGATCAGGCTGAACATCTTAAGACAGCAGTGCAAA -TGGCAGTATTCATCCACAATTTTAAAAGAAGAGGGGGGATTGGGGGGTACAGTGCAGGGG -AAAGAATAGTAGACATAATAGCAACAGACCTACAAACTAAAGAATTACAAAAACAAATTA -CAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGACCCAGTTTGGAAAGGAC -CAGCAAAACTACTCTGGAAAGGTGAAGGGGCAGTGGTCATACAAGACAATAGTGAAATAA -AAGTAGTGCCAAGAAGAAAAGCAAAGATAATTAGGGATTATGGAAAACAGATGGCAGGTG -ATGATTGTGTGGCAGGTAGACAGGATGAGGATTAGCAAATGGAAAAGTTTAGTTAAATAC -CATATACATACTTCAAAGAAAGCCAAAAAATGGTTCTATCGACATCACTTTGAAAGCAGG -CATCCAAAAATAAGCTCAAAAGTACACATCCCWCTAGAGGAAGAAAATAAATTAGTAGTA -ACAACATATTGGGGTCTGAATACAGGGGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCC -ATAGAATGGAGGCAGGGGAAGTATAGGACACAAATAGACCCTGGCCTGGCAGACCAACTG -ATTCATATATATTATTTTGATTGTTTTTCAGAATCTGCTATAAGGAAAGCCATATTAGGA -CATAGAGTTAGCCCTAGGTGTAACTATCAAGCAGGACATAACAAGGTAGGATCTCTACAA -TATTTGGCACTAACAGCATTAATAGCTACAAAGAAGGCAAAGCCGCCTTTGCCTAGTGTC -CAGAAATTAGTARRAGACAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGAGAGC -CATACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAAAATGAAGCTGTCAGACATT -TTCCTAGGCCATGGCTCCATGGCTTAGGACAACATATCTACAACACCTATGGGGATACTT -GGGAGGGAGTTGAAGCTATAATAAGGATGCTGCAACATCTACTGTTTATCCATTTCAGAA -TTGGGTGCAATCATAGCAAAATAGGCATTATTCGACAGAGAAGAACAAGAAATGGAGCCA -GTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGCCAGCCTAGGACTGCGTGTACC -AAATGTTATTGTAAGAGATGTTGCTTTCATTGCCAAGTTTGCTTCATAACAAAAGGCTTA -GGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAARACCTCCTCAAAGCGGTCAG -ACTCATCAAGATTCTCTATCAAAGCAGTGAGTAGCATATGTAATGCAATCTTTAGAGATA -TTAGCAATAGTAGCACTAGTAGTAGCAGCAATATTAGCAATAGTTGTGTGGTCTATAGTA -CTCATAGAATATAGGAAAATATTAAGAGAAAGAAAAGTATATAAATTAATTGACAGAATA -AGAGAAAGAGCAGAAGACAGTGGCAATGAGAGCGAGGGAGATCAAGAAGAATTATCAGCA -ATGGTGGAAAGGGGGCATCTTGCTCCTTGGGATATTAATGATCTGTAATGCTGAAAAGTC -TGAAAAGTTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACCACCAC -TCTATTTTGTGCATCAGATGCTAAGGCATATGATACAGAGGCACATAATGTTTGGGCCAC -ACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAATGTTATTGAAAAATGTGACAGA -AAATTTTAACATGTGGAAAAACAACATGGTAGATCAGATGCATGAAGATATAATTAGTTT -ATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACACCACTTTGTGTTACCTTAAATTG -CAATAATACTGTCACCACTAATGCTAGCATGAATAATAGTGGAGAAATGAAAAATTGCTC -TTTCAATATCACCACCCAAACGAGAGGGAGAAAGAGAGAATATGCACTTTTTTATAACCT -TGATGTAGTGCAATTAGAATCAGACAAAACTAGTACTAGCTATAGGTTGATAAGTTGTAA -CACCTCAGTCATTACACAGGCTTGTCCAAAGATATCCTTTGAGCCAATTCCCATACATTA -TTGTGCCCCAGCTGGTTTTGCGATTCTAAAGTGTAATGATAAGCAGTTCAATGGAACAGG -ACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAAC -TCAATTGCTGTTAAATGGCAGCCTAGCAGAAGAAGATATAATAATCAGGTCTCAAAATAT -CTCAGATAATGCAAAAAGCATAATAGTACAGTTGAATGAATCTATAAGCATTAATTGTAT -AAGACCCGGCAATAATACAAGAAAAAGTATACATATGGGACCAGGCAAGGTATTTTATGC -AACAGGAGATATAATAGGAAATATAAGACAAGCACATTGTAACATTAGTAAAGCAAAATG -GAATAACACTTTAAGACAGATAGCCAGAAAATTAGGAGAACAATTTAACAATAAAACAAT -AGTCTTTAATCAATCCTCAGGAGGGGACCCAGAAATTGTAATGCATACTTTTAACTGTGG -AGGGGAATTTTTCTACTGTAATACAACATCACTGTTTAATAGTACATGGGAGAATGATAC -AAATATTACTGAAGAATCAAATAGCTCAGATGACACAATCACACTCCAATGCAAAATAAA -ACAAATTATAAACTTGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGG -ATACATTAACTGTTCATCAAATATCACAGGGCTGATATTAGTAAGAGATGGTGGTAATAA -CAGAACAAGTGAGAGTGAGACCTTCAGACCTGAAGGAGGAAATATGAAGGACAATTGGAG -AAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAATAGCACCCACCAA -GGCAAAGAGAAGAGTGGTGCAGAGACAAAAAAGAGCAGTGGGATTTGGAGCTTTGTTCCT -TGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGGCGCTGACGGTACA -GGCCAGACTATTATTGTCTGGTATAGTGCAGCAGCAGAACAATCTGCTGAGGGCTATTGA -GGCGCAACAGCATATGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGT -CCTGGCTGTGGAAAGATACCTAAAGGATCAACAGATCATGGGGATTTGGGGTTGCTCTGG -AAAATACATCTGCACCACTGCTGTGCCTTGGAATACTAGCTGGAGTAATAAATCTTATGA -TCAGATTTGGAAGAACATGACCTGGATGCAGTGGGAAAAAGAAATTGATAATTACACAAG -TGAAATATACAGCTTAATTGCACTATCGCAAGACCAGCAAGAAAAGAATGAACAAGAATT -ATTGGAATTGGACAAATGGGCAAGCTTGTGGAATTGGTTTGACATATCAAACTGGCTGTG -GTATATAAAAATATTCATAATGATAGTAGGAGGCTTAGTAGGATTAAGAATAGTTTTTGC -AATACTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTGTCATTCCAGACCCA -CCACCCAGCTCCGAGGGGACCCGACAGGCCCGGAGGAATCGAAGAAGAAGGTGGAGAGAG -AGACAGAGACAGATCCGGTCGATCGGCGAGCGGATTCTTAACACTTATCTGGATCGACCT -ACGGAGCCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTACTCTTGATTGCAGCGAG -GATTGTGGAACTTCTGGGACGCAGGGGGTGGGAACTCCTCAAATACTGGTGGAACCTCCT -ACAGTATTGGAGTCAGGAACTAAAGAATAGTGCCATTAGCTTGCTTAATACCACAGCAAT -AGTAGTAGCTGAGGGGACAGATAGAATTATAGAAGCTTTGCAAAGTGCTGGTAGAGCTGT -TCTCCACATACCTAGAAGAATAAGACAGGGCTTAGAAAGGGCTTTACTATAAAATGGGAA -ACAAGTGGTCAAAAAGTAGTATAGTTGGATGGCCTACTATAAGGGAAAGAATAAGACGAA -CCCCTCCAATAGCAGAAGGGGTGGGAGCAGTCTCTCGAGACCTAGGAAAGCATGGAGCAA -TCACAAGTAGCAACACAGCAGCTAATAATCCTGACTTGGCCTGGCTGGAAGCACAGGAGG -GTGAGGAAGTAGGCTTCCCAGTCAGACCTCAGGTACCTTTAAGACCAATGACTTACAAGG -GAGCTTTCGATCTTAGCTTCTTTTTAAAAGAAAAGGGGGGACTGGAAGGGTTAATTTATT -CCAGGAAAAGACAAGAGATCCTTGATCTATGGGTTTACCACACACAAGGATACTTCCCTG -ATTGGCAGAACTACACACCAGGGCCAGGGGTCAGGTATCCATTGACCTTTGGGTGGTGCT -TCAAGCTAGTACCAGTTGACCCAGAGGAGGTAGAAAAGGCCAATGAAGGAGAAAACAACT -GCTTGCTACACCCCATGAGCCAACATGGAATGGAGGATGAAGACAGAGAAGTACTGATGT -GGAAGTTTGACAGACACCTAGCATCTAAGCACGTAGCCCGAGAGCTACATCCGGAGTATT -ACAAGGACTGCTGACATCGAGCTTTCTACAAGGGACTTTCCACTGGGGACTTTCCGAGGG -AGGTGTGGCCTGGGCGGGACAGGGGAGTGGCGAGCCCTCAGATTGC ->Ref.B.TH.90.BK132.AY173951 -GGACCTGAAAGCGAAAGAGAAACCAGAGGAGCTCTCTCGACGCAGGACTCGGCTTGCTGA -AGCGCGCACGGCAAGAGGCGAGGGGCGGCGACTGGTGAGTACGCCAAAAATTTTGACTAG -CGGAGGCTAGAAGGAGAGAGATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGACAATTAG -ATAGATGGGAGAAAATTCGGTTACGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATA -TAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTATTGGAAACAT -CAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCAAGCCTTCAGACAGGATCAGAAG -AACTTAGATCATTATATAATACAATAGCAGTCCTCTATTGTGTACATCAAAAGATAGAGG -TAAAAGACACCAAGGAAGCTTTAGAGAAGATAGAGGAAGAACAAAACAAAAGTAAGAAAA -AGGCACAGCAAGCAGCAGCTAACACAGAAAACAGCAGCCAGGTTAGCCAAAATTACCCTA -TAGTGCAAAATATGCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATG -CATGGGTAAAAGTAGTAGAAGAAAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAG -CATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGAC -ATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATA -GATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAA -GTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACACATAATC -CACCTATCCCAGTGGGAGAAATTTACAAAAGATGGATAATCCTGGGATTAAATAAAATAG -TAAGGATGTATAGCCCTACCAGCATTTTGGACATAAGACAAGGACCAAAGGAACCCTTTA -GAGACTATGTAGACCGGTTCTATAAGACTCTAAGAGCCGAGCAAGCCTCACAGGAGGTAA -AAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTT -TAAAAGCATTGGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGG -GAGGTCCCGGCCATAAGGCAAGAGTTTTGGCGGAAGCAATGAGCCAAGTGACAAATTCAG -CTACCATAATGATGCAGAAAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCA -ATTGTGGCAAAGAAGGGCACATAGCCCGAAATTGCAGGGCCCCTAGGAAGAAGGGCTGTT -GGAGATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAAAGACAGGCTAATTTTT -TAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAG -AGCCGACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTC -AGAAGCAGGAGACAATAGACAAGGAACTATATCCTTTAACTGCCCTCAAATCACTCTTTG -GCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATAC -AGGGGCAGATGATACAGTATTAGAAGAAATGAATTTGCCAGGAAGATGGAAACCAAAAAT -GATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAATATGATCAGATACTTGTAGAAAT -CTGTGGACATAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGG -AAGAAATTTGTTGACTCAGCTTGGTTGCACTTTAAATTTTCCTATTAGTCCTATTGAAAC -TGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGAC -AGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAAT -TTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGA -CAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAAAGAACTCAAGACTT -CTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTGAAAAAGAAAAAATCAGTAAC -GGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATAAAGACTTCAGGAAATA -TACTGCATTTACCATACCGAGTACAAACAATGAGACACCAGGGATTAGATATCAGTACAA -TGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAATGTAGCATGACAAAAAT -CTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTT -GTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACA -ACATCTGTTGAGGTGGGGATTTACCACACCAGATAAAAAACATCAGAAAGAACCTCCATT -CCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAACCTATAGTGCTGCC -GGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGC -AAGTCAGATTTACCCAGGGATTAAAGTAAAGCAGTTATGTAAACTCCTTAGGGGAACCAA -AGCACTAACAGAAGTAGTACCACTAACAAAAGAGGCAGAGCTAGAACTGGCAGAAAACAG -GGAAATTCTAAAAGAAACAGTACATGGAGTGTATTATGACCCATCAAAAGATTTAATAGC -AGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAA -AAATCTAAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACA -ATTAACAGAGGCAGTGCAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAGACTCC -TAAATTCAAACTACCCATACAAAAAGAAACATGGGAAACATGGTGGACAGAGTATTGGCA -AGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTA -CCAGTTAGAAAAAGAGCCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCGGCTAA -CAGGGAGACTAAATTAGGAAAAGCAGGATATGTGACTAACAGAGGAAGACAAAAAGTTGT -CTCCCTAACTGACACAACAAATCAAAAGACTGAGTTACAAGCAATTCACCTAGCTTTGCA -GGATTCGGGATTAGAAGTAAATATAGTAACAGACTCACAATATGCATTAGGAATCATTCA -AGCACAACCAGATAAAAGTGAATCAGAGATAGTCAGTCAAATAATAGAGCAGTTAATACA -AAAGGAAAAGGTCTACCTGGCATGGGTACCAGCACACAAGGGAATTGGAGGAAATGAACA -AGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTCTAGATGGAATAGATAA -GGCCCAAGAAGAGCATGAAAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAA -CCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGG -AGAAGCCATGCATGGACAGGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACA -TTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGA -AGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAG -ATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTACTACGGTTAA -GGCTGCCTGTTGGTGGGCGGGGATCAAGCAGGAATTTGGCATCCCCTACAATCCCCAAAG -TCAAGGAGTAGTAGAATCTATAAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGA -TCAGGCTGAACATCTTAAAACAGCAGTACAAATGGCAGTATTTATCCACAATTTTAAAAG -AAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGA -CATACAGACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTA -CAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTGTGGAAAGGTGAAGG -GGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGAT -CATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGA -GGATTAAAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAAAGAAAGCTAAGG -GATGGTTGTATAGACATCACTATCAAAGCATTCATCCAAGAATAAGTTCAGAAGTACATA -TCCCACTAGGAGAGGCTAGCTTGGTAATAAAGACATATTGGGGTCTGCATACAGGAGAAA -GAGAATGGCATTTGGGTCAAGGAGTCTCCATAGAATGGAGGAAAGGAAGATATAACACAC -AAGTAGACCCAGGCCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAG -AATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGTTGTGAATATCAAG -CAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCACTAACAGCATTAAGAACACCAA -AGAAGATAAAGCCACCTTTGCCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACAAGC -CCCAGAAGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAGAGCTTTTAGA -GGAGCTTAAGAATGAAGCTGTTAGACACTTTCCTAGGACATGGCTCCACGGATTAGGGCA -ATATATCTATGAAAATTATGGGGACACTTGGGCAGGAGTGGAGGCCATAATAAGAATTCT -GCAACAACTGCTGTTTATTCATTTCAGGATTGGGTGTCGACATAGCAGAATAGGCATTAC -TCTACAAAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATC -CAGGAAGTCAGCCTAAGACGGCTTGTAACACTTGCTATTGTAAAAAATGTTGCTTTCATT -GCCAAGTTTGTTTCACAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGAC -AGCGACGAAGAGCTCCTCAGGACCGTCAGACTCATCAAGCTTCTCTATCAAAGCAGTAAG -TAGTATATGTAATGCACTCTTTACAAATATTAGGAATAGTAGCATTAGTAGTAGCAGGAA -TAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAA -GGAAAATAGACAGGTTAATTGATAGAATAATAGAAAGAGCAGAAGACAGTGGCAATGAGA -GTGAAGGAGATCAGGAAGAATTGTCAGCACTTGTGGAGATGGGGCATCTTGCTCCTTGGG -ATATTAATGATTAGTAGTGCTGCAGAAAATTTGTGGGTCACAGTCTATTATGGGGTACCT -GTGTGGAAAGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACA -GAAGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAA -GTAGTATTGGTAAATGTGACAGAAAATTTTAAYATGTGGACAAATAACATGGCAGAACAG -ATGCATGAGGATATAATCAGTTTATGGGATCAAAGTCTAAAGCCATGTGTAAAATTAACT -CCACTCTGTGTTACTTTAAATTGCACTGATTTGAGAAATACTACTAATACCAATAGTACC -GCCGAGGAAATGGAGGCGAAAGGAGAAATGAAAAACTGCTCTTTCAATATCACCACAAGC -ATAAGGAATAAGTTGCAGAAAGAATATGCACTCTTTTATAAACTTGATATAGTACCAATA -AATAATGATAATACTAGCTATAGGTTGATAAGTTGTAACACCTCAGTCATTACCCAGGCC -TGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCG -ATTCTAAAGTGTAATGATAAGAAGTTCAGTGGAAACGGACCATGTAAAAATGTCAGCACA -GTACAATGTACACATGGAATTAAGCCAGTAGTATCAACTCAACTGCTGCTAAATGGCAGT -CTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTTACAGACAATGCTAAAACCATA -ATAGTACAGCTGAAAGAACCTGTAGAAATTAATTGTACAAGACCTAACAACTATACAAGG -AAAAGAATAACTATGGGACCAGGGAGAGTATATTATACAACAGGAGAAATAATAGGAGAT -ATAAGACGAGCACATTGTAACATTAGTAGCACAAAATGGAATAACACTTTAGGACAGATA -GTTAAAAAATTAAAAGAACAATTTAACAATAATACAATAGTCTTTAAGAAATCCTCAGGA -GGGGACCCAGAAATTGTAATGCACAGTTTTATTTGTGGAGGGGAATTTTTCTTCTGTAAT -TCAACAAAACTGTTTAATAGTACTTGGAATAGCACTGAAGGAAATGACGATGGAGAGGAA -AGAAATATCACACTCCCATGCAGAATAAAACAAATTGTAAACATGTGGCAGGAAGTAGGA -AAAGCAATGTATGCCCCTCCCATCGGAGGACAAATTAGATGCACCTCAAATATTACAGGG -CTGCTATTAACAAGAGATGGAGGTAACCAAAATGGGACCAACGAGACTGAAATCTTCAGA -CCTGGAGGAGGAAATATGAGGGACAATTGGAGAAGTGAACTATATAAATATAAAGTAGTA -AAAATTGAACCATTAGGAGTAGCACCCACTAAGGCAAAGAGAAGAGTGGTGCAGAGAGAA -AAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACT -ATGGGCGCAGCGTCAGTGACGCTGACGGTACAGGCCAGACTATTATTGTCTGGTATAGTG -CAACAGCAGAACAATCTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACA -GTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCTTAGCTGTGGAAAGATACCTAAAGGAT -CAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATCTGCACCACTGCTGTGCCT -TGGAATGCTAGTTGGAGTAATAAATCTCTGGATGAGATTTGGAATAACATGACCTGGATG -CAATGGGAAAGAGAAATTAACAATTACACAGGCTTAATATACACCTTAATTGAAGAATCG -CAAAACCAACAAGAAAAGAATGAACTAGATTTACTGCAATTAGATAAATGGGCAAGTTTG -TGGAATTGGTTTGACATAACAAACTGGCTGTGGTATATAAAAATATTCATAATGATAGTA -GGAGGATTGGTAGGTTTAAGAATAATTTTTACTGTACTTTCTATAGTGAATAGGGTTAGG -CAGGGATACTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGG -CCCGGAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACACATCCGGACGCTTAGTG -GATGGATTCTTAGCAATTTTCTGGGTCGATCTGCGGAACCTGTGCCTCTTCAGCTACCAC -CGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGG -TGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAAT -AGTGCTGTTAGCTTGCTCAATGCCACAGCTATAGCAGTAGCTGAGGGGACAGATAGGGTT -ATAGAAGTATTACAAAGAGTTTATAGAGCTATTCTCAACATACCTACAAGAATCAGACAG -GGCTTGGAAAGGGCTTTGCTATAAGATGGGTAGCAAGTGGTCAAAAATGAGTGGGTGGCC -TGCTGTAAGGGAAAGAATGAGAAGAACTAAGCCAGCTGAGCCAGCAGCAGATGGAGTGGG -AGCAGCATCTAGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTAC -CAATGCTGATTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGGGGAGGTGGGTTTCCCAGT -CAAACCTCAGGTACCTTTAAGACCAATGACTTACAAGGGAGCTCTAGATCTTAGCCACTT -TTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAACAAAGACAAGATATCCT -TGATCTGTGGGTCTACCATACACAAGGCTACTTCCCTGATTGGCAGAATTACACACCAGG -GCCAGGGGTCAGATTCCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGAGCC -AGACAAGGTAGAAGAGGCCAATGAAGGGGAAAACAACTGCTTGTTACACCCTATGAGCCA -GCATGGGATGGAAGACCCGGAGAAAGAAGTGTTAATGTGGAAGTTTGACAGCCGCCTGGC -ATTGCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAGGACTGCTGACATCGAGC -TTTCTACAAGGGACTTTCCGCTGGGGACTTTCCAGGGAGGCGTGGCCTGGGCGGGACTGG -GGAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTTTGCCTGTACTGGGTCTC -TCTGGTTAGACCAGATCAGAGCCTGGGAGCTCTCTGGCTAACTAGGGAACCCACTGCTTA -AGCCTCAATAAAGCTTGCCTTGAGTGCTTCAAGTAGTGTGTGCCCGTCTGTTGTGT ->Ref.28_BF.BR.99.BREPM12817.DQ085874 -ACTAGAGATCCCTCAGACACTTTTAGTCAGTGTGGAAAATCTCTAGCAGTGGCGCCCGAA -CAGGGACCGAAAGCGAAAGTAGAACCAGAGGAGATCTCTCGACGCAGGACTCGGCTTGCT -GAAGTGCGCACGGCAAGAGGCGAGGGGCGGCGACTGGTGAGTACGCCAATTTTTGACTAG -CGGAGGCTAGAAGGAGAGAGATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAG -ATAGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATG -TAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTTTTAGAGACAT -CAGAAGGCTGTAGACAAATACTGGAACAGCTACAACCATCCCTTAAGACAGGATCAGAAG -AACTTAAATCATTATTTAATACAGTAGCAGTCCTCTATTGTGTGCATCAAAAGATAGAGG -TAAAAGATACCAAAGAAGCTTTAGATAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAA -AGGCACAGCAAGCAGCAGCTGGCACAGGAAACAGCAACCAGGCCAGCCAAAATTACCCTA -TAGTGCAGAACCTTCAGGGGCAAATGGTACATCAACCCCTATCACCTAGAACTTTAAATG -CATGGGTAAAGGTGGTAGAAGAGAAGGGTTTTAACCCAGAAGTAATACCCATGTTTTCAG -CATTATCAGAAGGGGCCACTCCACAAGATTTAAACACCATGTTAAATACAGTGGGGGGAC -ATCAAGCAGCCATGCAAATGTTAAAAGACACCATCAATGAGGAAGCTGCAGAATGGGACA -GATTACATCCAGTGCATGCAGGACCTATCCCACCAGGCCAGATGAGGGAACCTAGGGGAA -GTGATATAGCTGGAACAACTAGTAACCTTCAGGAACAAATACAATGGATGACAAGCAACC -CACCTGTCCCAGTGGGAGAAATCTATAAAAGATGGATCATCCTAGGATTAACTACACTAG -TAGGAATGTATAGCCCTGTCAGCATTTTGGACATAAAACAAGGGCCAAAAGACCTTTTTA -GAGACTATGTAGACCGGTTCTTTAAAACCCTAAGACTTGAGCAATGTACACAGGAAGTAA -AAGGTTGGATGACAGACACCTTGTTGGTTCCAAATGCGAACCCCGATTGTAAGACCTTTT -TAAAAGCTTTGGGCCCAGGGCCTTCACTTGAAGAACTGACGACCCCTGGTCCGGGAGTGG -GAGGACCTAGCCATAAGGCAAGAGTTTTGGCTGAGGCAATGAGCCAAACAACAAGTGCAG -CTGTAATGATGCAGAAAAGTAACTTTAAGGGCCAAAGAAGAATTGTTAAATGTTTTAATT -GTGGCAAAGAAGGACACATAGCCAAAAATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGA -AATGTGGAAAGGAAGGACATCAAATGAAAGACTGCACTGAAAGACAGGCTAATTTTTTAG -GGAGAATTTGGCCTTCCCACAAGGGAAGGCCCGGGAATTTCCTTCAGAGCAGGCCAGAGC -CATCAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAGGAGACAACAACTCCCCCTCAGA -AGCAGGAGCCGATAGACAAGGAACTGTATCCTTTGGCTTCCCTCAAATCACTCTTTGGCA -ACGACCAATAGTCACAGTAAAGATAGAGGGACAGCCAAAGGAAGCTCTATTAGATACAGG -AGCAGATGATACAGTATTAGAAGACATAAATTTGCCAGGAAAATGGAAACCAAAAATGAT -AGGGGGAATTGGAGGTTTTATCAAAGTAAAACAGTATGATAACATATCCATAGACATTTG -TGGACACAAGGCTACAGGTACAGTATTAGTAGGGCCTACACCTGTCAACATAATTGGAAG -AAATCTGTTGACTCAGATTGGCTGTACTTTAAATTTCCCCATTAGTCCTATTGAAACTGT -ACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGA -AGAAAAAATAAAAGCATTAATAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTC -AAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAGGACAG -TACTAAATGGAGAAAATTAGTAGATTTCAGAGAGCTTAATAAGAAAACTCAAGACTTCTG -GGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGT -ACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATAAGGAATTCAGGAAGTATAC -TGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGT -GCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGCAGCATGACAAAAATCTT -AGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTA -TGTAGGATCTGACTTAGAAATAGAGCAGCATAGAACAAAAGTAGAGGAACTGAGACAACA -TCTGTTAAGGTGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCCCCATTCCT -TTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGA -AAAGGACAGCTGGACTGTCAATGACATACAGAAATTAGTAGGAAAATTGAATTGGGCAAG -TCAGATTTATCCAGGGATTAAAGTAAGGCAATTATGTAAGCTCCTTAGGGGAACCAAAGC -ACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGA -GATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGA -AATACAGAAGCAGGGGCAATGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAA -TCTGAAAACAGGAAAGTATGCAAGAACGAGGGGTGCCCACACAAATGATGTAAAACAACT -AACAGAAGCAGTGCAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAGATTCCTAA -ATTTAAATTGCCCATACAGAAAGAGACATGGGAAACATGGTGGATAGAGTATTGGCAAGC -CACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCA -GTTAGAAAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAG -GGAAACTAAAAAAGGAAAAGCAGGATATGTTACTGACAGAGGAAGACAAAAAGTTGTTCC -CCTGACGGACACAACAAATCAGAAGACTGAGCTACAAGCAATCCATTTAGCTTTGCAGGA -TTCGGGAGTAGAGGTAAACATAGTAACAGACTCCCAATATGCATTAGGAATCATCCAAGC -ACAACCAGATAAAAGTGAATCAGAGTTAGTTAGTCAAATAATAGAGCAGTTAATAAAAAA -GGAAAAAGTCTATTTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGT -AGATAAATTAGTCAGTGCTGGAATCAGAAAGATACTATTTTTAGATGGAATAGATAAGGC -CCAAGAGGAACATGAGAAATATCACAGTAATTGGAGATCAATGGCTAGTGATTTTAACCT -GCCACCTATAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGA -AGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAGCTAGATTGTACACACTT -AGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATACATAGAAGCAGAAGT -TATTCCAGCAGAGACAGGGCAAGAAACAGCATACTTTATCTTAAAATTAGCAGGAAGATG -GCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCATCAGTAACACGGTTAAGGC -CGCCTGTTGGTGGGCGGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCA -AGGAGTAATAGAATCTATGAATAAGGAATTAAAGAAAATTATAGGACAGGTAAGAGATCA -GGCTGAACATCTTAAAACAGCAGTGCAAATGGCAGTATTCATTCACAATTTTAAAAGAAA -AGGGGGGATTGGGGGGTACAGTGCAGGAGAAAGAATAGTAGACATAATAGCAACAGACAT -ACAAACTAAAGAACTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAG -GGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTGCTCTGGAAAGGTGAAGGGGC -AGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCAT -TAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGA -TTAGAACATGGAACAGTTTAGTAAAACACCATGTGTATGTTTCAAGGAAAGCTAGGAATT -GGGTTTATAAACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCC -CACTAGGGGATGCTAAATTAGTAGTAATAACTTATTGGGGTCTGCATACAGGAGAAAGAG -ACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAGGAGATATAGCACACAAA -TAGACCCTGACCTAGCAGACCAACTAATTCACCTGTATTATTTTGATTGTTTTTCAGAAT -CTGCTATAAGAAATGCCATATTAGGACGTATAGTTAGACCTAGGTGTGATTATCAAGCAG -GACATAACAAGGTAGGATCTCTACAGTACTTGGCACTAACAGCATTAGTAACACCAAAAA -AGATAAAGCCACCTTTGCCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACAAGCCCC -AGCAGATCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAGAGCTTTTAGAGGA -ACTTAAGAATGAAGCTGTTAGACATTTTCCTAGACCATGGCTCCATGGCTTAGGACAACA -TATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCA -ACAACTGCTGTTCATTCATTACAGAATTGGATGTCAACATAGCAGAATAGGCATTACTCG -ACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAACACCCAG -GGAGTCAGCCTAGGACTGCCTGTACCAAATGCTACTGTAAAAAGTGTTGCTTTCATTGCC -AAGTTTGTTTCATAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGC -GACGAAGAACTCCTCAAGATAGTCAGACTCATCAAGTTTCTCTATCAAAACAGTAAGTAT -TACATGTAATGCACTCTTTACAAATATTAGCAATAGTAGCATTAGTAGTAGTAGCAATAA -TAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGGCAAAGAA -GAATAGACAGGATAATTGAGAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGCG -AAGGGGACCAGGAAGAATTACTGGTGGAAATGGGGCATGATGCTCCTTGGGATGTTAATG -ATCTGTAATGCTGCAGAACAATCGTGGGTCACAGTCTACTATGGGGTGCCTGTGTGGAAA -GAAGCATCCACCACCTTATTTTGTGCATCAGATGCTAGCGCAGTTGACACAGAGGTACAT -AATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCTAACCCACAGGAAATAGTATTG -GAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGATCAAATGCATGAA -GATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGT -GTTACTTTAAATTGCACTAATTGGAATACTACTAATAAGTTGAATGCTACTGATACCAAT -AGTAGTAGAATAGAGGAAGAAATGAAGGAAGAAATGAAAAACTGCTCTTTCAATGTCACC -TCAAGCATAGGAAATAAGATGCAGAAAGAATATGCACTTTTTTATAAACTTGATGTAGTA -CCAATAAATAATGATAGTACAAGCTATACACTGATAAATTGTAACACCTCAGTCATTACA -CAAGCCTGTCCAAAGGTATCCTTTGAACCAATTCCCATACATTATTGTACCCCAGCTGGT -TTTGCGCTTCTAAAGTGTAATGATAAGAAATTCAATGGAACAGGACCATGTACAAATGTC -AGTACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAAT -GGCAGTCTAGCAGAAGAAGAAATAGTGATTAGATCTGAAAATTTCTCGAACAATGCTAAA -ACCATAATAGTACAGCTGAATGAAACTGTAGAAATTACTTGTGAAAGACCCAACAACAAT -ACAAGAAAAGGTATACATCTAGGATTTGGGAGAGCATTGTATGCAACAGGAGAAATAATA -GGAGATATAAGACAAGCATATTGTAACCTTAATAGAACAAAATGGGAAAACACTTTAAAA -CGGATAGTTACAAAATTAGGAGAACAATTTAAAAATCAAAATAAAACAATAACCTTCGAT -CTCTCCTCAGGAGGGGACCCAGAAATTATGCTGCACAGTTTCAGTTGTGGAGGGGAATTC -TTCTACTGTAATACAACACAGCTGTATAATAGTAATAGGAAGCAGGAGAATAATGGCACT -TGGAATAATAATGGGAGTAATACTAATGATAATATCACACTCCCATGCAGAATAAAACAA -ATTATAAACAGGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCACAGGAGCA -ATTAGATGTTTATCAAATATTACAGGGCTGTTATTAACAAGAGATGGTGGAACGAGAGCG -AATCTGTCCGAGACCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAA -TTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAAGGCAAAG -AGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAACAATAGGAGCTATGTTCCTTGGG -TTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATGACGCTGACGGTACAAGCC -AGACAATTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGGGCTATTGAGGCG -CAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTG -GCTGTAGAAAGATACCTAAAGGATCAACAGCTCCTAGGGATTTGGGGTTGCTCTGGAAAA -CTCATCTGCACCACTGCTGTGCCTTGGAATACTAGTTGGAGTAATAAATCTCTAGATGAA -ATTTGGAACAACATGACCTGGATGCAGTGGGAAAAAGAAATTGACAATTACACAGGCGTA -ATATACAATTTAATTGAAGAAGCGCAAAACCAACAAGAAAAGAATGAACAAGAATTATTG -CAATTGGATACGTGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTAT -ATAAAGATATTCATAATGATAGTAGGAGGCTTAGTAGGTTTAAGAATAGTTTTTGCTGTA -CTCTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTCCAGACCCGCCTC -CCAGCCCAGAGGGGACCCGACAGGCCCGGAGGAATCGAAGAAGAAGGTGGAGAGAGAGAC -AAGGACAGATCCATAAGATTAGTAGATGGATTCTTAGCAATCATCTGGGAAGACCTACGG -AGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTGACGAGGGTT -GTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGCTGGAATCTCCTGCAG -TATTGGAGTCAGGAACTAAGGAATAGTGCTGTTAGTTTGCTTAATGCCACAGCTATAGCA -GTAGCTGAGGGAACAGATAGGATTATAGAAATAGTACAAATAATTTGTAGAGCTATTCTC -CACATACCTAGACGAATAAGACAGGGCTTAGAAAGAGCTTTGCAATAAGATGGGTGGCAA -GTGGTCAAAATGTAGCATGGGTGGGTGGACTGCTGTAAGGGAAAGAATGAGACGAACTGA -GCCAGCAACTGAGCCAGCAGCAGATGGGGTGGGAGCAGTATCTCGAGACTTGGAAAAACA -TGGAGCAATTACAAGTAGCAATACAGCAGCTACTAATGCTGACTGTGCCTGGCTAGAAGC -ACAAGAGGAGGAGGAAGTGGGCTTCCCAGTCAGACCTCAGGTACCTTTAAGACCAATGAC -TTACAAAGGAGCTTTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCT -AATTTACTCCCAGAAAAGACAAGAGATCCTTGATCTGTGGGTCTACAACACACAAGGATA -CTTCCCTGATTGGCAGAACTACACAAAAGGGCCAGGGATCAGATATCCACTGACCTTCGG -ATGGTGCTTCAAGCTAGTGCCAGTTGATCCAGAACAGGTAGAAAAGGCCAATGAAGGAGA -GAACAACAGCCTGTTACACCCTATGAGCCAGCATGGGATGGATGACCCAGAGAAAGAAGT -GTTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCGCCACGTAGCCAGAGAGCTGCATCC -GGAGTATTACAAAGACTGCTGACATCGAGTTTTCTACATGGGACTTTCCACTGGGGACTT -TCCAGGGAGG diff --git a/tests/expected-results-small-csv/defects.csv b/tests/expected-results-small-csv/defects.csv index f5f2fd6..6e1bdce 100644 --- a/tests/expected-results-small-csv/defects.csv +++ b/tests/expected-results-small-csv/defects.csv @@ -1,86 +1 @@ qseqid,error,message,region -KX505501.1,Frameshift,ORF 'pol' at 1629-1746 contains out of frame indels that impact 1950 positions.,pol -KX505501.1,Deletion,ORF 'pol' exceeds maximum deletion tolerance. Contains 2892 deletions with max tolerance of 93 deletions.,pol -KX505501.1,SequenceDivergence,ORF 'pol' exceeds maximum distance tolerance. It is 2.13586 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.88345.,pol -KX505501.1,MutatedStopCodon,ORF 'pol' has a mutated stop codon: 'T--'.,pol -KX505501.1,Frameshift,ORF 'env' at 1747-1746 contains out of frame indels that impact 1714 positions.,env -KX505501.1,Deletion,ORF 'env' exceeds maximum deletion tolerance. Contains 2346 deletions with max tolerance of 54 deletions.,env -KX505501.1,SequenceDivergence,ORF 'env' exceeds maximum distance tolerance. It is 2.11186 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.35595.,env -KX505501.1,Frameshift,ORF 'vif' at 1747-1746 contains out of frame indels that impact 386 positions.,vif -KX505501.1,Deletion,ORF 'vif' exceeds maximum deletion tolerance. Contains 354 deletions with max tolerance of 12 deletions.,vif -KX505501.1,SequenceDivergence,ORF 'vif' exceeds maximum distance tolerance. It is 2.04883 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.78802.,vif -KX505501.1,Frameshift,ORF 'vpr' at 1747-1746 contains out of frame indels that impact 194 positions.,vpr -KX505501.1,Deletion,ORF 'vpr' exceeds maximum deletion tolerance. Contains 66 deletions with max tolerance of 6 deletions.,vpr -KX505501.1,SequenceDivergence,ORF 'vpr' exceeds maximum distance tolerance. It is 2.00365 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.58981.,vpr -KX505501.1,Frameshift,ORF 'tat_exon1' at 1747-1746 contains out of frame indels that impact 144 positions.,tat_exon1 -KX505501.1,Deletion,ORF 'tat_exon1' exceeds maximum deletion tolerance. Contains 69 deletions with max tolerance of 0 deletions.,tat_exon1 -KX505501.1,SequenceDivergence,ORF 'tat_exon1' exceeds maximum distance tolerance. It is 2.21806 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.85696.,tat_exon1 -KX505501.1,Frameshift,ORF 'rev_exon1' at 1747-1746 contains out of frame indels that impact 51 positions.,rev_exon1 -KX505501.1,Insertion,ORF 'rev_exon1' exceeds maximum insertion tolerance. Contains 69 insertions with max tolerance of 0 insertions.,rev_exon1 -KX505501.1,SequenceDivergence,ORF 'rev_exon1' exceeds maximum distance tolerance. It is 2.25 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.92308.,rev_exon1 -KX505501.1,Frameshift,ORF 'vpu' at 1747-1746 contains out of frame indels that impact 166 positions.,vpu -KX505501.1,Deletion,ORF 'vpu' exceeds maximum deletion tolerance. Contains 24 deletions with max tolerance of 6 deletions.,vpu -KX505501.1,SequenceDivergence,ORF 'vpu' exceeds maximum distance tolerance. It is 2.044 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.07949.,vpu -KX505501.1,Frameshift,ORF 'tat_exon2' at 1747-1746 contains out of frame indels that impact 62 positions.,tat_exon2 -KX505501.1,Deletion,ORF 'tat_exon2' exceeds maximum deletion tolerance. Contains 15 deletions with max tolerance of 0 deletions.,tat_exon2 -KX505501.1,SequenceDivergence,ORF 'tat_exon2' exceeds maximum distance tolerance. It is 2.24194 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.18333.,tat_exon2 -KX505501.1,Frameshift,ORF 'rev_exon2' at 1747-1746 contains out of frame indels that impact 184 positions.,rev_exon2 -KX505501.1,Deletion,ORF 'rev_exon2' exceeds maximum deletion tolerance. Contains 198 deletions with max tolerance of 7 deletions.,rev_exon2 -KX505501.1,SequenceDivergence,ORF 'rev_exon2' exceeds maximum distance tolerance. It is 2.21413 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.75773.,rev_exon2 -KX505501.1,Frameshift,ORF 'nef' at 1747-1778 contains out of frame indels that impact 425 positions.,nef -KX505501.1,Deletion,ORF 'nef' exceeds maximum deletion tolerance. Contains 396 deletions with max tolerance of 48 deletions.,nef -KX505501.1,SequenceDivergence,ORF 'nef' exceeds maximum distance tolerance. It is 2.0371 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.34148.,nef -KX505501.1,MutatedStartCodon,ORF 'nef' has a mutated start codon: 'AGA'.,nef -KX505501.1,MutatedStopCodon,ORF 'nef' has a mutated stop codon: 'GAC'.,nef -KX505501.1,RevResponseElementDeletion,Query Sequence exceeds maximum deletion tolerance in RRE. Contains 265 deletions with max tolerance of 20 deletions., -KX505501.1,LongDeletion,Query sequence contains a very large deletion., -KX505501.1,Scramble,Sequence is plus-scrambled., -MN691959,Frameshift,ORF 'vpu' at 5911-6156 contains out of frame indels that impact 122 positions.,vpu -MN691959,InternalStop,ORF 'vpu' at 5911-6156 contains an internal stop codon at 5974.,vpu -MN691959,SequenceDivergence,ORF 'vpu' exceeds maximum distance tolerance. It is 1.70488 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.07949.,vpu -MN691959,Frameshift,ORF 'tat_exon2' at 8238-8330 contains out of frame indels that impact 32 positions.,tat_exon2 -MN691959,Deletion,ORF 'tat_exon2' exceeds maximum deletion tolerance. Contains 45 deletions with max tolerance of 0 deletions.,tat_exon2 -MN692074,Frameshift,ORF 'pol' at 2085-4082 contains out of frame indels that impact 676 positions.,pol -MN692074,Deletion,ORF 'pol' exceeds maximum deletion tolerance. Contains 981 deletions with max tolerance of 93 deletions.,pol -MN692074,MutatedStopCodon,ORF 'pol' has a mutated stop codon: 'GAT'.,pol -MN692074,Frameshift,ORF 'env' at 4083-4082 contains out of frame indels that impact 1714 positions.,env -MN692074,Deletion,ORF 'env' exceeds maximum deletion tolerance. Contains 2073 deletions with max tolerance of 54 deletions.,env -MN692074,SequenceDivergence,ORF 'env' exceeds maximum distance tolerance. It is 2.05571 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.35595.,env -MN692074,Frameshift,ORF 'vif' at 4083-4082 contains out of frame indels that impact 386 positions.,vif -MN692074,Deletion,ORF 'vif' exceeds maximum deletion tolerance. Contains 81 deletions with max tolerance of 12 deletions.,vif -MN692074,SequenceDivergence,ORF 'vif' exceeds maximum distance tolerance. It is 1.99787 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.78802.,vif -MN692074,Frameshift,ORF 'vpr' at 4083-4082 contains out of frame indels that impact 194 positions.,vpr -MN692074,Insertion,ORF 'vpr' exceeds maximum insertion tolerance. Contains 207 insertions with max tolerance of 6 insertions.,vpr -MN692074,SequenceDivergence,ORF 'vpr' exceeds maximum distance tolerance. It is 1.9951 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.58981.,vpr -MN692074,Frameshift,ORF 'tat_exon1' at 4083-4082 contains out of frame indels that impact 144 positions.,tat_exon1 -MN692074,Insertion,ORF 'tat_exon1' exceeds maximum insertion tolerance. Contains 249 insertions with max tolerance of 0 insertions.,tat_exon1 -MN692074,SequenceDivergence,ORF 'tat_exon1' exceeds maximum distance tolerance. It is 2.21806 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.85696.,tat_exon1 -MN692074,Frameshift,ORF 'rev_exon1' at 4083-4082 contains out of frame indels that impact 51 positions.,rev_exon1 -MN692074,Insertion,ORF 'rev_exon1' exceeds maximum insertion tolerance. Contains 387 insertions with max tolerance of 0 insertions.,rev_exon1 -MN692074,SequenceDivergence,ORF 'rev_exon1' exceeds maximum distance tolerance. It is 2.25 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.92308.,rev_exon1 -MN692074,Frameshift,ORF 'vpu' at 4083-4082 contains out of frame indels that impact 166 positions.,vpu -MN692074,Insertion,ORF 'vpu' exceeds maximum insertion tolerance. Contains 249 insertions with max tolerance of 24 insertions.,vpu -MN692074,SequenceDivergence,ORF 'vpu' exceeds maximum distance tolerance. It is 2.00052 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.07949.,vpu -MN692074,Frameshift,ORF 'tat_exon2' at 4083-4082 contains out of frame indels that impact 62 positions.,tat_exon2 -MN692074,Deletion,ORF 'tat_exon2' exceeds maximum deletion tolerance. Contains 60 deletions with max tolerance of 0 deletions.,tat_exon2 -MN692074,SequenceDivergence,ORF 'tat_exon2' exceeds maximum distance tolerance. It is 2.24194 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.18333.,tat_exon2 -MN692074,Frameshift,ORF 'rev_exon2' at 4083-4082 contains out of frame indels that impact 184 positions.,rev_exon2 -MN692074,Deletion,ORF 'rev_exon2' exceeds maximum deletion tolerance. Contains 243 deletions with max tolerance of 7 deletions.,rev_exon2 -MN692074,SequenceDivergence,ORF 'rev_exon2' exceeds maximum distance tolerance. It is 2.21413 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.75773.,rev_exon2 -MN692074,Frameshift,ORF 'nef' at 4083-4086 contains out of frame indels that impact 412 positions.,nef -MN692074,Deletion,ORF 'nef' exceeds maximum deletion tolerance. Contains 123 deletions with max tolerance of 48 deletions.,nef -MN692074,SequenceDivergence,ORF 'nef' exceeds maximum distance tolerance. It is 2.00372 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.34148.,nef -MN692074,MutatedStartCodon,ORF 'nef' has a mutated start codon: 'ACC'.,nef -MN692074,MutatedStopCodon,ORF 'nef' has a mutated stop codon: 'CAG'.,nef -MN692074,RevResponseElementDeletion,Query Sequence exceeds maximum deletion tolerance in RRE. Contains 265 deletions with max tolerance of 20 deletions., -MN692074,LongDeletion,Query sequence contains a very large deletion., -MN090335,Frameshift,ORF 'gag' at 482-1665 contains out of frame indels that impact 1230 positions.,gag -MN090335,InternalStop,ORF 'gag' at 482-1665 contains an internal stop codon at 683.,gag -MN090335,MutatedStartCodon,ORF 'gag' has a mutated start codon: 'ATA'.,gag -MN090335,MutatedStopCodon,ORF 'gag' has a mutated stop codon: 'AAA'.,gag -MN090335,Frameshift,ORF 'tat_exon2' at 7740-7832 contains out of frame indels that impact 32 positions.,tat_exon2 -MN090335,Deletion,ORF 'tat_exon2' exceeds maximum deletion tolerance. Contains 45 deletions with max tolerance of 0 deletions.,tat_exon2 -MN090335,PackagingSignalDeletion,Query sequence exceeds maximum deletion tolerance in PSI. Contains 42 deletions with max tolerance of 10 deletions., -MN090335,MajorSpliceDonorSiteMutated,Query sequence has a mutated splice donor site: AT. The context is TTAACTGCGAAT-----CGTTC., -MN090335,Scramble,Sequence is minus-scrambled., -MN090335,InternalInversion,Sequence contains an internal inversion., diff --git a/tests/expected-results-small-csv/holistic.csv b/tests/expected-results-small-csv/holistic.csv index 539734b..1c67e33 100644 --- a/tests/expected-results-small-csv/holistic.csv +++ b/tests/expected-results-small-csv/holistic.csv @@ -1,6 +1 @@ qseqid,intact,qlen,hypermutation_probablility,inferred_subtype,blast_matched_qlen,blast_sseq_coverage,blast_qseq_coverage,blast_sseq_orfs_coverage,orfs_start,orfs_end,blast_n_conseqs,is_reverse_complement -KX505501.1,False,1997,0.7087072014754221,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE,1997,0.2498456472525211,1.2158237356034052,0.17666166916541728,789,8793,4,False -MN691959,False,9493,0.19667690182893238,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE,9493,1.0817040543321672,1.1086063415148004,1.0,789,8793,3,False -MN692074,False,4178,0.36378645339477633,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE,4178,0.5042189750977567,1.1728099569171853,0.411544227886057,789,8793,4,False -MN692145,True,9689,0.1672411051048176,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE,9689,1.130479522535501,1.1271545051088863,1.0,789,8793,3,False -MN090335,False,9069,0.1771850809736527,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE,9069,0.9842560197571517,1.0603153600176425,1.000374812593703,789,8793,3,False diff --git a/tests/expected-results-small-csv/regions.csv b/tests/expected-results-small-csv/regions.csv index 98e8e96..ef28202 100644 --- a/tests/expected-results-small-csv/regions.csv +++ b/tests/expected-results-small-csv/regions.csv @@ -1,56 +1 @@ qseqid,region,start,end,orientation,distance,indel_impact,protein,nucleotides,subtype_start,subtype_end,subtype_aminoacids,subtype_nucleotides -KX505501.1,gag,336,1745,forward,0.41298449612403343,69,MGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTGGATAGATGGGAAAAAATTCGGCTAAGGCCAGGAGGAAATAAAAGATATAAACTAAAACATATAGTATGGGCAAGCAGAGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGCTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCGACTCTTAAGACAGGATCAGAAGAACTTAAGTCTTTGTATAATACAATAGCAGTCCTCTATTGTGTACATCAAAAAATAGATGTGAAAGACACCAAGGAAGCCTTAGACAAGATAGAAGAAGAGCAAAACAAAATTAGGAAACAAGCACAGCAAACCGCAGCTGCAGGAAGTAGCAGTCAGGTCAGCCACAATTACCCTGTAGTGCAGAATCATCAGGGGCAAATGGTATATCAGGCCCTGTCACCAAGAACTTTAAATGCATGGGTAAAAGTAATAGAGGAAAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGACTTAAACACCATGCTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATTAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAGATTTATAAAAGATGGATAATCCTAGGGTTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCTTTTAGAGACTATGTAGACCGGTTCTATAAAACCCTAAGAGCCGAACAAGCCACACAGGAAGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCGAACCCAGATTGTAAAACTATTTTAAAAGCATTAGGACCAGCAGCCACACTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGGACCCAGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAATGACAAATTCAGCTACAGTAATGATGCAGAAAGGCAATTTTAGGAACCAAAGAAAAACTGTTAAGTGCTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACATCAAATGAAAGACTGTACTGAGAGACAGGCTAATTTTGTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACACCAACT,789,2291,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA -KX505501.1,pol,1628,1745,forward,2.1358565737051802,1950,FCRENLAFPQGKAGEFPSEQTRANSPTSRELQVWGRDTN,TTTTGTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACACCAACT,2084,5095,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -KX505501.1,env,1746,1745,forward,2.1118604651162807,1714,MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ,,6223,8793,MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*,ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA -KX505501.1,vif,1746,1745,forward,2.0488262910798123,386,MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ,,5040,5618,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG -KX505501.1,vpr,1746,1745,forward,2.0036496350364965,194,MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ,,5558,5848,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -KX505501.1,tat_exon1,1746,1745,forward,2.218055555555555,144,MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPT,,5829,6043,MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -KX505501.1,rev_exon1,1746,1745,forward,2.2499999999999996,51,MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPT,,5968,6043,MAGRSGDSDEELIRTVRLIKLLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -KX505501.1,vpu,1746,1745,forward,2.044,166,MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ,,6060,6308,MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*,ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG -KX505501.1,tat_exon2,1746,1745,forward,2.2419354838709675,62,RCIRSTTRTADTELFTRDFPLGTFQ,,8375,8467,RPTSQPRGDPTGPKEPKKKVERETETDPFD*,AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG -KX505501.1,rev_exon2,1746,1745,forward,2.214130434782609,184,RCIRSTTRTADTELFTRDFPLGTFQ,,8376,8651,DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*,GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG -KX505501.1,nef,1746,1777,forward,2.03710407239819,425,MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ,AGATGCATCCGGAGTACTACAAGGACTGCTGA,8795,9415,MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA -MN691959,gag,639,2141,forward,0.05400000000000005,0,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALEKIEEEQNKSKKKAQQAAADTGHSNQISQNYPIVQNIQGQVVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNXXXPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKECTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPSQKQEPIDKELYPLTSLRSLFGNDPSSQ,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGAGAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAACCAGATCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAGTGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCYWYCTWTCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAAGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCWGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGAGTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA,789,2291,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA -MN691959,pol,1934,4945,forward,0.03585657370517925,0,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSLSEAGADRQGTVSFNFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTRALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKITTESIVIWGKTPRFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNKGRQKVVPLTNTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKILFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSATVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAAAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAGAGCATTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGGAAATATGCAAGAACGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGTATAGTAATATGGGGAAAGACTCCTAGATTTAAACTACCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAATATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCTTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGGTATGTTACTAACAAAGGAAGACAAAAGGTTGTCCCCCTAACTAACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAATACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTGCTACGGTTAAGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG,2084,5095,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MN691959,vif,4890,5468,forward,0.0625,0,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYERTHPRISSEVYIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGAACTCATCCAAGAATAAGTTCAGAAGTATACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACATATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACAAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG,5040,5618,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG -MN691959,vpr,5408,5698,forward,0.0625,0,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTQQKRARNGASRS,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCAACAGAAGAGAGCAAGAAATGGAGCCAGTAGATCCTAG,5558,5848,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -MN691959,tat_exon1,5679,5893,forward,0.25,0,MEPVDPRLEPWKHPGSQPKTACTTCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRPPQDSQTHQVSLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCACTTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATGACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCA,5829,6043,MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MN691959,rev_exon1,5818,5893,forward,0.46153846153846145,0,MAGRSGDSDEDLLKTVRLIKFLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCA,5968,6043,MAGRSGDSDEELIRTVRLIKLLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MN691959,vpu,5910,6155,forward,1.704878048780488,122,MQPIQIAIVALVVAIIIAIVV,ATGCAACCTATACAAATAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGATCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGGAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAGTGTCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATATTGATGATCTGTAG,6060,6308,MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*,ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG -MN691959,env,6070,8655,forward,0.10139372822299642,0,MRVKEKCQHLWRWGWRWGTMLLGILMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDAEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLGNATNTNSSNTNSSSGEMMMEKGEIKNCSFNISTSIRNKVQKEYAFFYKLDIVPIDNDTTSYTLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEDVVIRSANFTDNAKTIIVQLNQSVEINCTRPNNNTRKSIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNATLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQFINMWQEVGKAMYAPPISGQIRCSSNITGLLLTRDGGNNNNGSEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNNMTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPNPGGPDRPEGIEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGAYRAIRHIPRRIRQGLERILL,ATGAGAGTGAAGGAGAAGTGTCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATATTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATGCAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGGGGAATGCTACTAATACCAATAGTAGTAATACCAATAGTAGTAGCGGGGAAATGATGATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAAATAAGGTGCAGAAAGAATATGCATTCTTTTATAAACTTGATATAGTACCAATAGATAATGATACTACCAGCTATACGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGATGTAGTAATTAGATCTGCCAATTTCACAGACAATGCTAAAACCATAATAGTACAGCTGAACCAATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGTATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATGCCACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACACTCCCATGCAGAATAAAACAATTTATAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGCGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAACAACAATGGGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAACAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGCTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATAACATGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAAACCCGGGGGGACCCGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTAGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAATATTGGAGTCAGGAGCTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCTATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTATAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA,6223,8793,MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*,ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA -MN691959,tat_exon2,8237,8329,forward,0.19354838709677424,32,RPTSQTRGDPTGPKE,AGACCCACCTCCCAAACCCGGGGGGACCCGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG,8375,8467,RPTSQPRGDPTGPKEPKKKVERETETDPFD*,AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG -MN691959,rev_exon2,8238,8513,forward,0.16304347826086962,0,DPPPKPGGTRQARRNRRRRWRERQRQIHSISERILSTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTILESGAKE,GACCCACCTCCCAAACCCGGGGGGACCCGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTAGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAATATTGGAGTCAGGAGCTAAAGAATAG,8376,8651,DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*,GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG -MN691959,nef,8657,9277,forward,0.043689320388349495,0,MGGKWSKSSVIGWPTVRERMRRAEPAADGVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGIRYPLTFGWCYKLVPVEPDKVEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATGGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATCAGATATCCATTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGGTAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGAATGGATGACCCTGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA,8795,9415,MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA -MN692074,gag,789,2291,forward,0.14990059642147102,0,MGARASVLSAGELDKWEKIRLRPGGKKKYRLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTIATLYCVHQKIEVKDTKEALEKVEEEQNKSKKKAQQAAADTGNNSQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIIMGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQKGNFRNQKRNVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPAQKQEPIDKELYPLASLRSLFGNDPSSQ,ATGGGTGCGAGAGCGTCAGTATTAAGTGCGGGAGAATTAGACAAATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAATAGCAACCCTCTATTGTGTGCATCAAAAGATAGAGGTAAAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGCAGCAGCTGACACAGGAAACAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCACCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAGCCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCATGGGATTAAATAAAATAGTAAGGATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGATCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTGAAAGCATTGGGACCAGCCGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAAGCAAGAGTTTTGGCGGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAAAGGCAATTTTAGGAACCAAAAAAGGAATGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCGCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA,789,2291,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA -MN692074,pol,2084,4081,forward,0.8050695825049854,676,FFREDLAFPQGKAREFPSEQTRANSPTRRELQVWGRDNNSRSEAGADRQGTVSFSFPQITLWQRPLVTIRIGGQLKEALLDTGADDTVLEEMNLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLRKKKSVTVLDVGDAYFSVPLDKEFRKYTAFTIPSRNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPEIVIYQYMDDLYVGSDLEIGQHRIKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEIVPLTKEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYAKMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI,TTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCGCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAGGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAGAAAGAAAAAATCAGTAACAGTACTGGATGTGGGGGATGCATATTTTTCAGTTCCCTTAGATAAAGAATTTAGGAAATATACTGCATTTACCATACCCAGTAGAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATTTTAGAGCCTTTTAGAAAACAAAATCCAGAAATAGTTATCTATCAATACATGGATGATTTATATGTAGGATCTGATTTAGAAATAGGGCAGCATAGAATAAAAATAGAGGAACTGAGACAACATCTGTTGAGATGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCCCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAATAGTACCACTAACAAAAGAAGCAGAGCTGGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTATATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGTCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAAAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCCATACAAAAAGAAACATGGGAAGCATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAATTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGAT,2084,5095,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MN692074,env,4082,4081,forward,2.055707762557078,1714,MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI,,6223,8793,MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*,ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA -MN692074,vif,4082,4081,forward,1.997872340425532,386,MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI,,5040,5618,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG -MN692074,vpr,4082,4081,forward,1.9950980392156863,194,MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI,,5558,5848,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -MN692074,tat_exon1,4082,4081,forward,2.218055555555555,144,MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPD,,5829,6043,MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MN692074,rev_exon1,4082,4081,forward,2.2499999999999996,51,MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPD,,5968,6043,MAGRSGDSDEELIRTVRLIKLLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MN692074,vpu,4082,4081,forward,2.0005208333333333,166,MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI,,6060,6308,MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*,ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG -MN692074,tat_exon2,4082,4081,forward,2.2419354838709675,62,TQWRALRCCI,,8375,8467,RPTSQPRGDPTGPKEPKKKVERETETDPFD*,AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG -MN692074,rev_exon2,4082,4081,forward,2.214130434782609,184,TQWRALRCCI,,8376,8651,DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*,GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG -MN692074,nef,4082,4085,forward,2.0037162162162163,412,MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI,ACCC,8795,9415,MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA -MN692145,gag,775,2280,forward,0.16267465069860276,0,MGARASVLSGGKLDQWEKIRLRPGGKKKYQLKHIVWASKELERFAVNPGLLETTEGCRQILGQLQPSLQTGSEELKSLYNTVATLYCVHQRIDVKDTKEALDKIEEEQNKSKEKAKQAAADTGNSNQVSQNYPIVQNLQGQMVHQAISPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKESFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPGATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRSQRKIVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKQESTGKELYPPLASLRSLFGNDPSSQ,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAAAATTAGATCAATGGGAGAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATCAATTAAAACATATAGTATGGGCAAGCAAGGAACTAGAACGATTTGCAGTTAATCCTGGCCTGTTAGAAACAACAGAAGGATGTAGACAAATACTGGGACAGCTACAACCATCTCTTCAGACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGATGTAAAAGACACCAAGGAAGCATTAGATAAGATAGAGGAAGAGCAAAACAAGAGTAAGGAAAAAGCAAAACAAGCAGCAGCTGACACAGGAAACAGCAACCAGGTCAGCCAAAATTACCCTATAGTGCAGAACCTTCAGGGCCAAATGGTACACCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAATAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACCACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGGATGTATAGTCCTACCAGCATTCTGGATATAAGACAAGGACCAAAAGAATCCTTTAGAGATTATGTAGACCGGTTCTACAAAACTCTAAGAGCTGAGCAAGCCTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCAAACCCAGATTGTAAGACTATCTTAAAAGCATTGGGACCAGGAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAAGCAAGAGTCTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAGCCAAAGGAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAGGGGCATATAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAAAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGAAATTTCCTTCAGAGCAGGCCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGAGTCGACAGGCAAGGAACTGTATCCTCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA,789,2291,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA -MN692145,pol,2070,5084,forward,0.10796019900497522,0,FFRENLAFPQGEARKFPSEQARANSPTRRELQVWGRDNNSLSEAGVDRQGTVSSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQISIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALTEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQCSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKESWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGAKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARVRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGASNRETKLGKAGYVTDRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLINKEKVYLAWVPAHKGIGGNEQVDKLVSTGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAREIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKVKIIRDYGKQMAGDDCVASRQDED,TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGAAATTTCCTTCAGAGCAGGCCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGAGTCGACAGGCAAGGAACTGTATCCTCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTACCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATTAAAGTAAGACAGTATGATCAGATATCCATAGAAATCTGTGGACATAAAGCTATAGGGACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAAATTGGTTGTACTTTAAATTTCCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAACAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGCACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAAAGAACTCAAGATTTCTGGGAAGTTCAATTAGGGATACCACATCCAGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAGGATTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAATGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAATTGAGACAACATCTGTTGAGGTGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGAAAGCTGGACTGTCAATGACATACAAAAGTTAGTGGGTAAATTGAATTGGGCAAGTCAGATTTATGCAGGGATCAAAGTAAAACAATTATGTAAACTTCTTAGGGGAGCCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTTTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAGATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAGTGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAGCCACTGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAGGTTACCCATACAAAAAGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCATCTAATAGAGAGACTAAATTAGGAAAAGCAGGATATGTTACTGACAGAGGAAGACAGAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAACAGTTAATAAACAAGGAAAAGGTCTACTTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTTAGTACTGGAATTAGAAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGAAGAACATGAGAAATACCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTGGTAGCAAGAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAGCTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGACAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGTAATTTCACTAGTACTACAGTTAAGGCCGCCTGTTGGTGGGCGGGGATCAAGCAGGAGTTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGACCAGGCTGAACATCTTAAAACAGCAGTACAAATGGCAGTATTCATTCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTGATACAAGACAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGTAAAAATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG,2084,5095,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MN692145,vif,5029,5607,forward,0.21875,0,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKAKGWFYRHHYESHHPRISSEVHVPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPNLADQLIHLYYFDCFSESAIRSAILGHIVSPSCEYQAGHNKVGSLQYLALAALITPKRRKPPLPSVAKLTEDRWNKPQKTKGHRGSHIRNGH,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAAGGGATGGTTTTATAGACATCACTATGAAAGCCATCATCCAAGAATAAGTTCAGAAGTACATGTCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGATTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCCAACCTAGCAGACCAACTGATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAGTGCCATATTAGGACATATAGTTAGTCCTAGTTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCATTAGCAGCATTAATAACACCAAAAAGGAGAAAGCCCCCTTTGCCTAGTGTTGCAAAGCTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACCGAGGGAGCCATATCAGGAATGGACACTAG,5040,5618,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG -MN692145,vpr,5547,5837,forward,0.34375,0,MEQAPEDQGPPREPYQEWTLELLEELKNEAVRHFPRMWLHGLGQHIYDTYGDTWTGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRTRNGANRS,ATGGAACAAGCCCCAGAAGACCAAGGGCCACCGAGGGAGCCATATCAGGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATGTGGCTCCATGGCTTAGGGCAACATATCTATGACACTTATGGGGATACTTGGACAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTCATTCATTTCAGGATTGGGTGTCAACATAGCAGAATAGGCATTATTCGCCAGAGGAGAACAAGAAATGGAGCCAATAGATCCTAG,5558,5848,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -MN692145,tat_exon1,5818,6032,forward,0.25,0,MEPIDPRLEPWKHPGSQPKTACTSCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRASQDSQTHQVSLSKX,ATGGAGCCAATAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGCCAGCCTAAGACTGCTTGTACCTCTTGCTATTGTAAAAAGTGTTGCTTTCATTGTCAAGTCTGTTTCATGACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCA,5829,6043,MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MN692145,rev_exon1,5957,6032,forward,0.34615384615384626,0,MAGRSGDSDEELLKTVRLIKFLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCA,5968,6043,MAGRSGDSDEELIRTVRLIKLLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MN692145,vpu,6049,6297,forward,0.6733333333333333,0,MNSLQISAIVAIVVAIILAIVVWSIVFIEYRKILRQRKIDRLIDRIRERAEDSGNESEGDQEELSTALMEMGHHAPWDVDDL,ATGAACTCTTTACAAATATCAGCAATAGTAGCAATAGTAGTAGCAATAATACTAGCAATAGTTGTGTGGTCTATAGTATTCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGATAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAACTATCAACAGCACTTATGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG,6060,6308,MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*,ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG -MN692145,env,6212,8782,forward,0.5647651006711409,0,MRVKGIRKNYQQHLWRWGTMLLGMLMICSAAGNLWVTVYYGVPVWKEATTTLFCASDAKAYGTEVHNVWATHACVPTDPNPQEVRLENVTENFNMWRNPMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDLGQESTNNNITGSIEKGEMKNCSFNISSSKGSKGHKEYASFYKLDVVPIGNNKTNGNYSSYRLISCNTSVITQACPKVSFEPIPIHFCAPAGYAILKCNDKQFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSANFTDNAKTIIVQLNQSVEITCTRPGNNTIRGVHMGPGKAFYAGAITGDIRKAHCIINKAKWNSTLRLVVAKLSEQFRNKTINFTQPSGGDPEIVTHSFNCGGEFFYCNTTKLFNSTWNATHMINGTWNDTEGNITLPCRIKQIINMWQEVGKAMYAPPIRGQIRCSSNITGLLLTRDGGKTNESNTTEIFRPGGGDMRDNWRSELYKYKVVRIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGATSMALTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGVKQLQARVLALERYLKDQQLLGFWGCSGKLICTTDVPWNASWSNKSLGQIWENMTWMEWEREIDNYTSLIYTLITESQIQQEQNEKELLELDKWANLWNWFGITNWLWYIKIFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTRFPAPRGPDRPEGIEEEGGERDRDRSAPLVKGFLALIWVDLRSLCLFSYHRLRDLLLIITRIVELLGRRGWEVLKYLWSLLQYWTQELKNSAVSLLNATAIAVAEGTDRIIEVLQRVFRAILHIPTRIRQGFERALL,ATGAGAGTGAAGGGGATCAGGAAGAACTATCAACAGCACTTATGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTGCAGGCAATTTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGGTACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTGAGATTGGAAAATGTGACAGAAAATTTTAATATGTGGAGAAATCCAATGGTAGAACAGATGCATGAGGATATAATTAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAGTTGACCCCACTCTGTGTCACTTTAAATTGCACTGATTTGGGACAGGAGAGTACCAATAATAATATCACAGGAAGTATAGAGAAAGGAGAAATGAAAAACTGCTCTTTCAATATCTCCTCAAGCAAAGGAAGTAAGGGGCATAAAGAATATGCATCTTTTTATAAACTTGATGTAGTACCAATAGGTAATAATAAGACTAATGGTAATTATTCTAGCTATAGGTTGATAAGTTGTAATACCTCAGTCATTACACAGGCCTGTCCAAAAGTATCATTTGAGCCAATTCCCATACATTTTTGTGCCCCGGCTGGGTATGCGATTCTAAAGTGTAATGATAAACAATTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAGTGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGTAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGCCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACCAATCTGTAGAAATTACTTGTACAAGACCCGGTAACAATACAATAAGAGGGGTTCACATGGGACCAGGGAAGGCATTTTATGCAGGAGCAATAACAGGAGATATAAGAAAAGCACATTGTATCATTAATAAAGCAAAATGGAACAGTACTTTAAGACTGGTAGTTGCAAAATTAAGTGAGCAATTTAGGAATAAAACAATAAACTTTACTCAACCCTCAGGAGGGGACCCAGAAATTGTAACACACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACAACAAAACTGTTTAATAGTACTTGGAATGCAACACACATGATTAATGGTACTTGGAATGATACTGAGGGAAATATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGAGGACAAATTAGATGTTCATCCAATATTACAGGGTTGCTATTAACAAGAGATGGTGGTAAGACTAACGAGAGCAACACCACCGAGATCTTTAGACCTGGAGGAGGAGATATGAGAGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAGAATTGAACCATTAGGGGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTCTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAACGTCGATGGCGTTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATTTGTTGCAACTCACAGTCTGGGGCGTCAAGCAGCTCCAGGCAAGAGTCCTGGCTCTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGTTTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGATGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGGACAAATTTGGGAGAACATGACCTGGATGGAGTGGGAAAGAGAAATTGATAATTACACAAGCTTAATATACACTTTAATTACAGAATCGCAGATCCAACAAGAACAGAATGAAAAAGAATTGTTGGAATTGGATAAATGGGCAAATTTGTGGAATTGGTTTGGCATAACAAACTGGCTGTGGTACATAAAAATATTTATAATGATAGTAGGAGGCTTAGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTTCAGACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGCTCCATTAGTGAAAGGATTCTTAGCACTTATCTGGGTCGATCTACGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATAATAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGTCCTCAAATACTTGTGGAGTCTCCTACAGTATTGGACACAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGATTATAGAAGTATTACAAAGAGTTTTTAGAGCTATCCTCCATATCCCTACAAGAATAAGACAGGGCTTTGAAAGGGCTTTGCTATAA,6223,8793,MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*,ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA -MN692145,tat_exon2,8364,8456,forward,0.3870967741935485,0,RPASQPRGDPTGPKESKKKVERETETDPLH,AGACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGCTCCATTAG,8375,8467,RPTSQPRGDPTGPKEPKKKVERETETDPFD*,AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG -MN692145,rev_exon2,8365,8640,forward,0.26086956521739135,0,DPLPSPEGTRQARRNRRRRWRERQRQIRSISERILSTYLGRSTEPVPLQLPPLERLTLDNNEDCGTSGTQGVGSPQILVESPTVLDTGTKE,GACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGCTCCATTAGTGAAAGGATTCTTAGCACTTATCTGGGTCGATCTACGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATAATAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGTCCTCAAATACTTGTGGAGTCTCCTACAGTATTGGACACAGGAACTAAAGAATAG,8376,8651,DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*,GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG -MN692145,nef,8784,9386,forward,0.40765550239234427,0,MGGKWSKSSLVGWPNVRERMRRAEPAADGVGAVSRDLERHGAITSSNTATNNADCAWLEAQKEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQKRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCFKLVPVEPTDRENDRLLHPASLQGMEDPEGEVLEWRFDSRLAFHHMARELHPEYYKNC,ATGGGGGGCAAGTGGTCAAAAAGTAGTTTGGTTGGATGGCCTAATGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATGGGGTGGGAGCAGTATCTCGAGACTTGGAAAGACATGGAGCAATCACAAGTAGTAATACAGCAACTAACAATGCTGATTGTGCCTGGCTAGAAGCACAAAAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAAAAGACAAGATATTCTTGATCTATGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGAGCCCACTGACAGAGAGAATGACAGATTGCTGCACCCTGCAAGCCTGCAGGGGATGGAAGACCCGGAGGGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTGGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAGTACTACAAGAACTGCTGA,8795,9415,MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA -MN090335,gag,481,1664,forward,0.9171874999999998,1230,MAYSPVAAPRLLPCALQQATPQDLNTMLNTVGGHQAAMQMLKEVINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQATGTANILMQRGNFRNQRKPVKCFNCGKEGHIAKNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSHKGRPGNFPQSRPEIPQSRPEPTAPPAPPEESFRFGEATTPSQKQETMDKELYPPLTSLRSLFGNDPSSQ,ATATGTTTTAATCTATATTTTTTCTTTCCCCCTGGCCTTAACCGAATTTTCTCCCATCTATCTAATTCTCCCCCGCTCAATACTGACGCTCTCGCACCCATCTCTCTCCTTCTAGCCTCCGCTAGTCAAAAATGGCGTACTCACCAGTCGCCGCCCCTCGCCTCTTGCCGTGCGCGCTTCAGCAAGCCACCCCACAAGATTTGAACACTATGCTAAACACAGTGGGTGGACACCAAGCAGCTATGCAAATGTTAAAAGAGGTCATCAATGAGGAAGCTGCAGAATGGGATAGATTACATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACACATAATCCACCTATCCCAGTAGGAGAGATTTATAAAAGATGGATAATTCTGGGACTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTTTGGACATAAAGCAAGGGCCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTTTAAGAGCCGAGCAAGCTACACAGGAAGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCAAATCCAGATTGTAAAACTATTTTAAAAGCATTAGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGGGGACCCGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGCAACAGGTACAGCCAACATACTGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGCCTGTTAAATGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACACCAAATGAAGGATTGCACTGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCTCACAAGGGAAGGCCAGGGAACTTTCCTCAGAGCCGACCAGAAATTCCTCAGAGCAGACCAGAGCCAACAGCCCCACCAGCCCCTCCAGAAGAGAGCTTCAGGTTTGGGGAAGCAACAACTCCCTCTCAGAAGCAGGAGACAATGGACAAGGAGCTGTATCCTCCTCTAACTTCCCTCAGATCACTCTTTGGCAACGACCCATCGTCACAATAA,789,2291,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA -MN090335,pol,1427,4468,forward,0.23952802359881997,0,FFRENLAFSQGKARELSSEPTRNSSEQTRANSPTSPSRRELQVWGSNNSLSEAGDNGQGAVSSSNFPQITLWQRPIVTIKIGGQLKEALLDTGADDTVLEDIDLPGKWKPKMIGGIGGFIKVRQYEQVPIEISGHKAIGTVLVGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDGTKWRKLVDFRELNKRTQEFWEIQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKTNPDIVIYQYMDDLYVGSDLEIGQHRTKVEELRQHLMRWGFTTPDKKHQKEPPFLWMGYELHPNKWTVQPIVLPEKESWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKDPVHGAYYDPAKDLIAELQKQGEGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKISTESIVIWGRTPKFRLPIQKETWETWWTDYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRDNKSGKAGYVTDRGRQKVVTINDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIEKAQEDHEKYHSNWRTMASDFNLPPIVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEEKIILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKTIHTDNGRNFTSNTVKAACWWAGIKQEFGIPYNPQSQGVVESMNNELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATEIQTKELQKQITKIPNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,TTTTTTAGGGAAAATCTGGCCTTCTCACAAGGGAAGGCCAGGGAACTTTCCTCAGAGCCGACCAGAAATTCCTCAGAGCAGACCAGAGCCAACAGCCCCACCAGCCCCTCCAGAAGAGAGCTTCAGGTTTGGGGAAGCAACAACTCCCTCTCAGAAGCAGGAGACAATGGACAAGGAGCTGTATCCTCCTCTAACTTCCCTCAGATCACTCTTTGGCAACGACCCATCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAGGACATAGATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAATATGAACAGGTACCCATAGAAATTAGCGGACACAAAGCTATAGGTACAGTGTTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGCACTCTAAATTTTCCCATCAGCCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGACGGCCCAAAGGTTAAACAATGGCCATTAACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTGTTTGCCATAAAGAAAAAAGATGGCACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGAATTCTGGGAAATTCAATTAGGTATACCACATCCTGCAGGGCTAAAAAAGAAAAAATCAGTCACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATAAAGACTTCAGGAAATATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTATAATGTGCTTCCACAAGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAGATCCTGGAGCCTTTTAGAAAGACAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAACATAGAACAAAAGTAGAGGAACTGAGGCAACATCTGATGAGGTGGGGATTTACCACACCAGACAAAAAGCACCAGAAAGAACCCCCATTCCTTTGGATGGGGTATGAACTCCATCCTAATAAATGGACAGTACAGCCTATAGTACTGCCAGAAAAGGAAAGTTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTATGCAGGGATTAAAGTGAAGCAATTATGTAAGCTCCTTAGGGGAACCAAAGCATTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGATCCGGTACATGGAGCATATTATGACCCAGCAAAGGACTTAATAGCAGAACTACAGAAGCAGGGAGAAGGTCAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGGAAATATGCAAGAACGAGGGGTGCCCATACTAATGATGTAAAGCAATTAACAGAGGCAGTGCAAAAAATATCCACAGAAAGTATAGTAATATGGGGAAGGACTCCTAAATTTAGATTACCCATACAAAAAGAAACATGGGAAACATGGTGGACAGATTATTGGCAAGCCACCTGGATCCCTGAGTGGGAGTTTGTCAACACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAAAAAGAACCCATAATAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAATAGGGATAATAAATCAGGAAAAGCAGGATATGTTACTGATAGAGGAAGACAAAAGGTTGTCACCATAAATGACACAACAAATCAGAAAACTGAATTACAAGCCATTCATCTAGCCTTGCAGGATTCGGGGCTAGAAGTAAACATAGTAACAGACTCACAGTATGCATTAGGGATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTCAGTCAAATAATAGAACAGTTAATAAAAAAGGAAAAGGTCTACCTGGCATGGGTGCCAGCCCACAAAGGAATTGGAGGAAATGAACAGGTAGATAAACTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGAAAAAGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAACAATGGCTAGTGATTTTAACCTACCACCTATAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCTATGCATGGGCAAGTAGACTGTAGTCCAGGTATATGGCAGCTAGATTGTACACATTTAGAAGAAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTGATTCCAGCAGAGACAGGGCAAGAAACAGCATACTTTATTTTAAAACTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCCGCAATTTCACCAGTAATACAGTGAAGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGTATTCCCTACAATCCCCAGAGTCAAGGAGTAGTAGAATCTATGAATAATGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGATACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGAAATACAGACTAAAGAATTACAAAAACAAATTACAAAAATTCCAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAACAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAAATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG,2084,5095,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MN090335,vif,4413,4991,forward,0.34375,0,MENRWQVMIVWQVDRMRIRTWKSLVKYHMYISKKAKKWAYRHHYETTHPRISSEVHIPLGDDRLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPDLADHLIHVHYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALTAVITPKKIKPPLPSVKKLTEDRWNKPQKTKGHRESHTMNGH,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAATACCATATGTATATTTCAAAGAAAGCCAAAAAATGGGCTTATAGACACCATTACGAAACCACTCATCCAAGAATAAGTTCAGAAGTACATATCCCACTAGGGGATGATAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAGAGAGACTGGCATTTGGGCCAGGGAGTCTCCATAGAATGGAGAAAAAGGAGATATAGCACACAAGTAGACCCTGACTTAGCAGACCATCTAATTCATGTGCATTATTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATAGAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAACAGCAGTAATAACACCAAAGAAGATAAAGCCACCTTTGCCTAGTGTAAAGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAG,5040,5618,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG -MN090335,vpr,4931,5221,forward,0.38144329896907214,0,MEQAPEDQGPQREPYNEWTLELLEELKREAVRHFPRPWLQDLGQYIYETYGDTWTGVEAIIRILQQMLFIHFRIGCHHSRIGIVLQRRARNGASRS,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAGAGCTTTTAGAAGAGCTTAAGAGAGAAGCTGTTAGACATTTCCCTAGACCATGGCTACAAGACCTAGGACAATATATCTATGAAACTTATGGGGATACTTGGACAGGAGTGGAAGCCATAATAAGAATTCTGCAACAAATGCTGTTTATTCATTTCAGAATTGGGTGTCACCATAGCAGAATAGGCATTGTTCTGCAAAGAAGAGCAAGAAATGGAGCCAGTAGATCATAG,5558,5848,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -MN090335,tat_exon1,5202,5416,forward,0.41666666666666674,0,MEPVDHRLEPWKHPGSQPRTPCTNCYCKKCCFHCQVCFIQKALGISYGRKKRRQRRRSPQDSQTHQVPLPKX,ATGGAGCCAGTAGATCATAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTCCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTGTGTTTTATACAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCA,5829,6043,MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MN090335,rev_exon1,5341,5416,forward,0.5769230769230769,0,MAGRSGDRDEDLLKTVRLIKFLYQSX,ATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCA,5968,6043,MAGRSGDSDEELIRTVRLIKLLYQSX,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA -MN090335,vpu,5433,5681,forward,0.6837209302325582,0,MQSLEILAIVALVVAAIIAIVVWTIVGIEYRKILRQRKIDKIINRIRERAEDSGNESEGDQEELSALVVEMGHDAPWDVDDL,ATGCAATCTTTAGAGATATTAGCAATAGTAGCATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTAGGCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGATAAAATAATTAATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGCGAAGGGGATCAGGAAGAATTGTCAGCACTTGTGGTGGAAATGGGGCACGATGCTCCTTGGGATGTTGATGATCTGTAG,6060,6308,MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*,ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG -MN090335,env,5596,8157,forward,0.6589887640449441,31,MRAKGIRKNCQHLWWKWGTMLLGMLMICSAAENLWVTVYYGVPVWRDANTTLFCASDAKAYDTEVHNVWATHACVPTDPNPHEVELKNVTENFNMWKNNMVDQMHEDIINLWDQSLKPCAKLTPLCVTLNCTDLRNDTVGNQTNLTETNTIQGREMTNCSFNITTEIRDKVRKEHALFYKLDVMPIDRDNTSYTLINCNTSVITQACPKVTFEPIPIHYCTPAGFAILKCKDEMFNGTGPCKNVSTVQCTHGIRPVVSTQLLLNGSLAEKEIVLRSENFTDNGKNIIVQLNRSIVINCTRPNNNTRKSISVAGRAIYATGQIIGDIRQAHCNISETDWNDTLSKIVEKLREKFGKDKTIIFNQSSGGDMEIETHSFNCGGEFFYCNTTRLFNSTWSVNGTSINGTKNITLPCRIKQIINRWQEVGKAMYAPPISGIIRCSSNITGLILTRDGGTNNSTEETETFRPGGGNMKDNWRSELYKYKVVKIEPLGVAPTKAQRRVVQREKRAIGALGAMFLGFLGAAGSTMGAASLTLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLSVERYLQDQQLLGIWGCSGKLICTTTVPWNTSWSNKSYDTIWDNMTWMQWDREIQNYTGKIYNLLEESQIQQEKNEKELLELDQWANLWNWFSITKWLWYIKIFIMIVGGLVGLKIIFAVFSIVNRVRQGYSPLSLQTLLPTPRGPDRPEGIEEEGGERDRGRSGRLVTGFLPLIWDDLRSLCLFSYHHLRDLLLIVLRTVQVLGHRGWEILKYWWSLLQYWIQELKNSAVSLLNTIAIAVAEGTDRVIEVGQRIGRAFLHIPRRIRQGLERALQ,ATGAGAGCGAAGGGGATCAGGAAGAATTGTCAGCACTTGTGGTGGAAATGGGGCACGATGCTCCTTGGGATGTTGATGATCTGTAGTGCTGCGGAAAATTTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAGAGATGCAAATACCACTCTATTCTGTGCATCAGATGCTAAAGCATATGATACAGAAGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCTAACCCACATGAAGTAGAATTGAAAAATGTGACAGAAAATTTTAATATGTGGAAAAATAACATGGTGGACCAGATGCATGAGGATATAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGCAAAATTGACCCCACTCTGTGTTACTTTAAATTGCACTGATTTAAGAAATGATACTGTAGGAAATCAAACAAATCTCACTGAAACTAATACAATACAGGGAAGAGAAATGACAAACTGCTCTTTCAATATCACCACAGAGATAAGAGATAAAGTGCGAAAAGAACATGCACTTTTTTATAAACTTGATGTCATGCCAATAGATAGGGATAATACAAGTTATACATTGATAAATTGCAATACCTCAGTCATTACACAGGCCTGTCCAAAGGTAACCTTTGAACCAATTCCCATACATTATTGTACTCCAGCTGGTTTTGCAATTCTAAAGTGTAAGGATGAGATGTTCAATGGAACAGGACCATGTAAGAATGTCAGCACAGTACAATGTACACATGGAATTAGACCAGTAGTGTCAACTCAACTACTGTTAAATGGTAGCCTAGCAGAAAAAGAGATAGTACTTAGATCTGAAAATTTCACAGACAATGGTAAAAATATAATAGTACAGCTAAATAGATCTATAGTAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGTATAAGTGTAGCAGGGAGAGCAATTTATGCAACAGGGCAGATAATAGGAGATATAAGACAAGCACATTGTAACATCAGTGAGACAGATTGGAATGACACTTTAAGCAAAATAGTTGAAAAATTAAGGGAAAAATTTGGAAAAGATAAAACAATAATCTTTAATCAATCATCAGGAGGGGACATGGAAATTGAAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAACACAACACGACTGTTTAATAGTACTTGGAGTGTTAATGGAACTAGCATAAACGGAACTAAAAATATCACACTCCCGTGCAGAATAAAACAAATTATAAACAGGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCTATCAGTGGGATAATTAGGTGCTCATCAAATATTACAGGACTAATATTAACAAGAGATGGTGGTACAAATAATAGTACAGAGGAGACGGAGACCTTCAGACCTGGAGGGGGAAATATGAAGGACAATTGGAGAAGTGAATTATATAAATACAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAAGGCACAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAATAGGAGCGTTAGGAGCTATGTTCCTCGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCACTGACGCTGACGGTACAAGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATCTGCTGAGGGCTATTGAGGCGCAACAGCATATGTTGCAACTCACAGTCTGGGGCATCAAACAGCTCCAGGCAAGAGTCCTGTCTGTGGAAAGATACCTACAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTACTGTGCCTTGGAATACTAGTTGGAGCAATAAATCTTACGATACCATCTGGGATAACATGACCTGGATGCAGTGGGACAGAGAAATTCAAAATTACACAGGGAAAATATACAACTTACTTGAGGAATCGCAAATCCAACAGGAAAAGAATGAAAAGGAATTATTAGAACTAGATCAATGGGCAAATTTGTGGAATTGGTTTAGTATAACAAAATGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTAGTAGGTTTAAAAATAATTTTTGCTGTATTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCATTGCAGACCCTCCTCCCAACCCCGAGGGGACCAGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGGCAGATCCGGAAGATTAGTGACTGGATTCTTACCACTCATCTGGGACGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTACTCTTGATTGTGCTGAGGACTGTGCAAGTTCTGGGACACAGGGGGTGGGAGATCCTCAAGTACTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATACCATAGCTATCGCAGTAGCTGAGGGAACAGATAGGGTTATAGAGGTAGGACAAAGAATTGGCAGAGCTTTTCTCCACATACCTAGAAGGATAAGACAGGGATTAGAAAGGGCTTTGCAATAA,6223,8793,MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*,ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA -MN090335,tat_exon2,7739,7831,forward,0.4838709677419355,32,RPSSQPRGDQTGPKE,AGACCCTCCTCCCAACCCCGAGGGGACCAGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGGCAGATCCGGAAGATTAG,8375,8467,RPTSQPRGDPTGPKEPKKKVERETETDPFD*,AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG -MN090335,rev_exon2,7740,8015,forward,0.4565217391304348,0,DPPPNPEGTRQARRNRRRRWRERQRQIRKISDWILTTHLGRPAEPVPLQLPPLERLTLDCAEDCASSGTQGVGDPQVLVESPAVLDSGTKE,GACCCTCCTCCCAACCCCGAGGGGACCAGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGGCAGATCCGGAAGATTAGTGACTGGATTCTTACCACTCATCTGGGACGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTACTCTTGATTGTGCTGAGGACTGTGCAAGTTCTGGGACACAGGGGGTGGGAGATCCTCAAGTACTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAACTAAAGAATAG,8376,8651,DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*,GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG -MN090335,nef,8159,8812,forward,0.5935483870967742,0,MGGKWSKGCMAGWPTVRERMERIDPRPAARREQAEPAAAGVGAASRDLEKYGAITSSNTSTTNAACAWLEAQEEEEVGFPVRPQVPLRPMTYKAALDLSHFLKEKGGLEGLIWSQRRQEILDLWVYHTQGYFPDWQNYTPGPGVRFPLTFGWCFKLVPVDPDQVEKANEGENNCLLHPMSLHGMEDSEGEVLMWKFDSSLAFHHRAREQHPEYYKDC,ATGGGTGGCAAGTGGTCAAAAGGTTGTATGGCTGGATGGCCTACTGTAAGGGAAAGAATGGAAAGAATTGATCCAAGGCCTGCTGCAAGGAGGGAACAAGCTGAGCCAGCAGCAGCTGGGGTAGGAGCAGCATCTCGAGACTTGGAAAAATATGGAGCAATCACAAGTAGCAATACATCAACTACTAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAAGTAGGCTTTCCAGTCAGACCTCAGGTACCTTTAAGACCAATGACTTACAAAGCAGCTTTGGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTGGTCCCAGAGAAGACAAGAAATCCTTGATTTGTGGGTTTACCACACACAAGGATACTTCCCTGATTGGCAAAACTACACACCAGGACCAGGGGTCAGATTTCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGACCAAGTAGAGAAGGCCAATGAAGGAGAGAACAACTGCTTGCTACACCCTATGAGCCTGCATGGGATGGAAGACTCAGAGGGAGAAGTGTTAATGTGGAAGTTTGACAGCAGCCTGGCGTTTCATCATAGAGCCCGAGAACAACATCCGGAGTACTACAAAGACTGCTGA,8795,9415,MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA diff --git a/tests/expected-results-small/defects.json b/tests/expected-results-small/defects.json index 38b6912..9e26dfe 100644 --- a/tests/expected-results-small/defects.json +++ b/tests/expected-results-small/defects.json @@ -1,521 +1 @@ -{ - "KX505501.1": [ - { - "qseqid": "KX505501.1", - "error": "Frameshift", - "message": "ORF 'pol' at 1629-1746 contains out of frame indels that impact 1950 positions.", - "region": "pol" - }, - { - "qseqid": "KX505501.1", - "error": "Deletion", - "message": "ORF 'pol' exceeds maximum deletion tolerance. Contains 2892 deletions with max tolerance of 93 deletions.", - "region": "pol" - }, - { - "qseqid": "KX505501.1", - "error": "SequenceDivergence", - "message": "ORF 'pol' exceeds maximum distance tolerance. It is 2.13586 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.88345.", - "region": "pol" - }, - { - "qseqid": "KX505501.1", - "error": "MutatedStopCodon", - "message": "ORF 'pol' has a mutated stop codon: 'T--'.", - "region": "pol" - }, - { - "qseqid": "KX505501.1", - "error": "Frameshift", - "message": "ORF 'env' at 1747-1746 contains out of frame indels that impact 1714 positions.", - "region": "env" - }, - { - "qseqid": "KX505501.1", - "error": "Deletion", - "message": "ORF 'env' exceeds maximum deletion tolerance. Contains 2346 deletions with max tolerance of 54 deletions.", - "region": "env" - }, - { - "qseqid": "KX505501.1", - "error": "SequenceDivergence", - "message": "ORF 'env' exceeds maximum distance tolerance. It is 2.11186 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.35595.", - "region": "env" - }, - { - "qseqid": "KX505501.1", - "error": "Frameshift", - "message": "ORF 'vif' at 1747-1746 contains out of frame indels that impact 386 positions.", - "region": "vif" - }, - { - "qseqid": "KX505501.1", - "error": "Deletion", - "message": "ORF 'vif' exceeds maximum deletion tolerance. Contains 354 deletions with max tolerance of 12 deletions.", - "region": "vif" - }, - { - "qseqid": "KX505501.1", - "error": "SequenceDivergence", - "message": "ORF 'vif' exceeds maximum distance tolerance. It is 2.04883 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.78802.", - "region": "vif" - }, - { - "qseqid": "KX505501.1", - "error": "Frameshift", - "message": "ORF 'vpr' at 1747-1746 contains out of frame indels that impact 194 positions.", - "region": "vpr" - }, - { - "qseqid": "KX505501.1", - "error": "Deletion", - "message": "ORF 'vpr' exceeds maximum deletion tolerance. Contains 66 deletions with max tolerance of 6 deletions.", - "region": "vpr" - }, - { - "qseqid": "KX505501.1", - "error": "SequenceDivergence", - "message": "ORF 'vpr' exceeds maximum distance tolerance. It is 2.00365 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.58981.", - "region": "vpr" - }, - { - "qseqid": "KX505501.1", - "error": "Frameshift", - "message": "ORF 'tat_exon1' at 1747-1746 contains out of frame indels that impact 144 positions.", - "region": "tat_exon1" - }, - { - "qseqid": "KX505501.1", - "error": "Deletion", - "message": "ORF 'tat_exon1' exceeds maximum deletion tolerance. Contains 69 deletions with max tolerance of 0 deletions.", - "region": "tat_exon1" - }, - { - "qseqid": "KX505501.1", - "error": "SequenceDivergence", - "message": "ORF 'tat_exon1' exceeds maximum distance tolerance. It is 2.21806 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.85696.", - "region": "tat_exon1" - }, - { - "qseqid": "KX505501.1", - "error": "Frameshift", - "message": "ORF 'rev_exon1' at 1747-1746 contains out of frame indels that impact 51 positions.", - "region": "rev_exon1" - }, - { - "qseqid": "KX505501.1", - "error": "Insertion", - "message": "ORF 'rev_exon1' exceeds maximum insertion tolerance. Contains 69 insertions with max tolerance of 0 insertions.", - "region": "rev_exon1" - }, - { - "qseqid": "KX505501.1", - "error": "SequenceDivergence", - "message": "ORF 'rev_exon1' exceeds maximum distance tolerance. It is 2.25 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.92308.", - "region": "rev_exon1" - }, - { - "qseqid": "KX505501.1", - "error": "Frameshift", - "message": "ORF 'vpu' at 1747-1746 contains out of frame indels that impact 166 positions.", - "region": "vpu" - }, - { - "qseqid": "KX505501.1", - "error": "Deletion", - "message": "ORF 'vpu' exceeds maximum deletion tolerance. Contains 24 deletions with max tolerance of 6 deletions.", - "region": "vpu" - }, - { - "qseqid": "KX505501.1", - "error": "SequenceDivergence", - "message": "ORF 'vpu' exceeds maximum distance tolerance. It is 2.044 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.07949.", - "region": "vpu" - }, - { - "qseqid": "KX505501.1", - "error": "Frameshift", - "message": "ORF 'tat_exon2' at 1747-1746 contains out of frame indels that impact 62 positions.", - "region": "tat_exon2" - }, - { - "qseqid": "KX505501.1", - "error": "Deletion", - "message": "ORF 'tat_exon2' exceeds maximum deletion tolerance. Contains 15 deletions with max tolerance of 0 deletions.", - "region": "tat_exon2" - }, - { - "qseqid": "KX505501.1", - "error": "SequenceDivergence", - "message": "ORF 'tat_exon2' exceeds maximum distance tolerance. It is 2.24194 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.18333.", - "region": "tat_exon2" - }, - { - "qseqid": "KX505501.1", - "error": "Frameshift", - "message": "ORF 'rev_exon2' at 1747-1746 contains out of frame indels that impact 184 positions.", - "region": "rev_exon2" - }, - { - "qseqid": "KX505501.1", - "error": "Deletion", - "message": "ORF 'rev_exon2' exceeds maximum deletion tolerance. Contains 198 deletions with max tolerance of 7 deletions.", - "region": "rev_exon2" - }, - { - "qseqid": "KX505501.1", - "error": "SequenceDivergence", - "message": "ORF 'rev_exon2' exceeds maximum distance tolerance. It is 2.21413 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.75773.", - "region": "rev_exon2" - }, - { - "qseqid": "KX505501.1", - "error": "Frameshift", - "message": "ORF 'nef' at 1747-1778 contains out of frame indels that impact 425 positions.", - "region": "nef" - }, - { - "qseqid": "KX505501.1", - "error": "Deletion", - "message": "ORF 'nef' exceeds maximum deletion tolerance. Contains 396 deletions with max tolerance of 48 deletions.", - "region": "nef" - }, - { - "qseqid": "KX505501.1", - "error": "SequenceDivergence", - "message": "ORF 'nef' exceeds maximum distance tolerance. It is 2.0371 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.34148.", - "region": "nef" - }, - { - "qseqid": "KX505501.1", - "error": "MutatedStartCodon", - "message": "ORF 'nef' has a mutated start codon: 'AGA'.", - "region": "nef" - }, - { - "qseqid": "KX505501.1", - "error": "MutatedStopCodon", - "message": "ORF 'nef' has a mutated stop codon: 'GAC'.", - "region": "nef" - }, - { - "qseqid": "KX505501.1", - "error": "RevResponseElementDeletion", - "message": "Query Sequence exceeds maximum deletion tolerance in RRE. Contains 265 deletions with max tolerance of 20 deletions.", - "region": null - }, - { - "qseqid": "KX505501.1", - "error": "LongDeletion", - "message": "Query sequence contains a very large deletion.", - "region": null - }, - { - "qseqid": "KX505501.1", - "error": "Scramble", - "message": "Sequence is plus-scrambled.", - "region": null - } - ], - "MN691959": [ - { - "qseqid": "MN691959", - "error": "Frameshift", - "message": "ORF 'vpu' at 5911-6156 contains out of frame indels that impact 122 positions.", - "region": "vpu" - }, - { - "qseqid": "MN691959", - "error": "InternalStop", - "message": "ORF 'vpu' at 5911-6156 contains an internal stop codon at 5974.", - "region": "vpu" - }, - { - "qseqid": "MN691959", - "error": "SequenceDivergence", - "message": "ORF 'vpu' exceeds maximum distance tolerance. It is 1.70488 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.07949.", - "region": "vpu" - }, - { - "qseqid": "MN691959", - "error": "Frameshift", - "message": "ORF 'tat_exon2' at 8238-8330 contains out of frame indels that impact 32 positions.", - "region": "tat_exon2" - }, - { - "qseqid": "MN691959", - "error": "Deletion", - "message": "ORF 'tat_exon2' exceeds maximum deletion tolerance. Contains 45 deletions with max tolerance of 0 deletions.", - "region": "tat_exon2" - } - ], - "MN692074": [ - { - "qseqid": "MN692074", - "error": "Frameshift", - "message": "ORF 'pol' at 2085-4082 contains out of frame indels that impact 676 positions.", - "region": "pol" - }, - { - "qseqid": "MN692074", - "error": "Deletion", - "message": "ORF 'pol' exceeds maximum deletion tolerance. Contains 981 deletions with max tolerance of 93 deletions.", - "region": "pol" - }, - { - "qseqid": "MN692074", - "error": "MutatedStopCodon", - "message": "ORF 'pol' has a mutated stop codon: 'GAT'.", - "region": "pol" - }, - { - "qseqid": "MN692074", - "error": "Frameshift", - "message": "ORF 'env' at 4083-4082 contains out of frame indels that impact 1714 positions.", - "region": "env" - }, - { - "qseqid": "MN692074", - "error": "Deletion", - "message": "ORF 'env' exceeds maximum deletion tolerance. Contains 2073 deletions with max tolerance of 54 deletions.", - "region": "env" - }, - { - "qseqid": "MN692074", - "error": "SequenceDivergence", - "message": "ORF 'env' exceeds maximum distance tolerance. It is 2.05571 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.35595.", - "region": "env" - }, - { - "qseqid": "MN692074", - "error": "Frameshift", - "message": "ORF 'vif' at 4083-4082 contains out of frame indels that impact 386 positions.", - "region": "vif" - }, - { - "qseqid": "MN692074", - "error": "Deletion", - "message": "ORF 'vif' exceeds maximum deletion tolerance. Contains 81 deletions with max tolerance of 12 deletions.", - "region": "vif" - }, - { - "qseqid": "MN692074", - "error": "SequenceDivergence", - "message": "ORF 'vif' exceeds maximum distance tolerance. It is 1.99787 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.78802.", - "region": "vif" - }, - { - "qseqid": "MN692074", - "error": "Frameshift", - "message": "ORF 'vpr' at 4083-4082 contains out of frame indels that impact 194 positions.", - "region": "vpr" - }, - { - "qseqid": "MN692074", - "error": "Insertion", - "message": "ORF 'vpr' exceeds maximum insertion tolerance. Contains 207 insertions with max tolerance of 6 insertions.", - "region": "vpr" - }, - { - "qseqid": "MN692074", - "error": "SequenceDivergence", - "message": "ORF 'vpr' exceeds maximum distance tolerance. It is 1.9951 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.58981.", - "region": "vpr" - }, - { - "qseqid": "MN692074", - "error": "Frameshift", - "message": "ORF 'tat_exon1' at 4083-4082 contains out of frame indels that impact 144 positions.", - "region": "tat_exon1" - }, - { - "qseqid": "MN692074", - "error": "Insertion", - "message": "ORF 'tat_exon1' exceeds maximum insertion tolerance. Contains 249 insertions with max tolerance of 0 insertions.", - "region": "tat_exon1" - }, - { - "qseqid": "MN692074", - "error": "SequenceDivergence", - "message": "ORF 'tat_exon1' exceeds maximum distance tolerance. It is 2.21806 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.85696.", - "region": "tat_exon1" - }, - { - "qseqid": "MN692074", - "error": "Frameshift", - "message": "ORF 'rev_exon1' at 4083-4082 contains out of frame indels that impact 51 positions.", - "region": "rev_exon1" - }, - { - "qseqid": "MN692074", - "error": "Insertion", - "message": "ORF 'rev_exon1' exceeds maximum insertion tolerance. Contains 387 insertions with max tolerance of 0 insertions.", - "region": "rev_exon1" - }, - { - "qseqid": "MN692074", - "error": "SequenceDivergence", - "message": "ORF 'rev_exon1' exceeds maximum distance tolerance. It is 2.25 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.92308.", - "region": "rev_exon1" - }, - { - "qseqid": "MN692074", - "error": "Frameshift", - "message": "ORF 'vpu' at 4083-4082 contains out of frame indels that impact 166 positions.", - "region": "vpu" - }, - { - "qseqid": "MN692074", - "error": "Insertion", - "message": "ORF 'vpu' exceeds maximum insertion tolerance. Contains 249 insertions with max tolerance of 24 insertions.", - "region": "vpu" - }, - { - "qseqid": "MN692074", - "error": "SequenceDivergence", - "message": "ORF 'vpu' exceeds maximum distance tolerance. It is 2.00052 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.07949.", - "region": "vpu" - }, - { - "qseqid": "MN692074", - "error": "Frameshift", - "message": "ORF 'tat_exon2' at 4083-4082 contains out of frame indels that impact 62 positions.", - "region": "tat_exon2" - }, - { - "qseqid": "MN692074", - "error": "Deletion", - "message": "ORF 'tat_exon2' exceeds maximum deletion tolerance. Contains 60 deletions with max tolerance of 0 deletions.", - "region": "tat_exon2" - }, - { - "qseqid": "MN692074", - "error": "SequenceDivergence", - "message": "ORF 'tat_exon2' exceeds maximum distance tolerance. It is 2.24194 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.18333.", - "region": "tat_exon2" - }, - { - "qseqid": "MN692074", - "error": "Frameshift", - "message": "ORF 'rev_exon2' at 4083-4082 contains out of frame indels that impact 184 positions.", - "region": "rev_exon2" - }, - { - "qseqid": "MN692074", - "error": "Deletion", - "message": "ORF 'rev_exon2' exceeds maximum deletion tolerance. Contains 243 deletions with max tolerance of 7 deletions.", - "region": "rev_exon2" - }, - { - "qseqid": "MN692074", - "error": "SequenceDivergence", - "message": "ORF 'rev_exon2' exceeds maximum distance tolerance. It is 2.21413 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 0.75773.", - "region": "rev_exon2" - }, - { - "qseqid": "MN692074", - "error": "Frameshift", - "message": "ORF 'nef' at 4083-4086 contains out of frame indels that impact 412 positions.", - "region": "nef" - }, - { - "qseqid": "MN692074", - "error": "Deletion", - "message": "ORF 'nef' exceeds maximum deletion tolerance. Contains 123 deletions with max tolerance of 48 deletions.", - "region": "nef" - }, - { - "qseqid": "MN692074", - "error": "SequenceDivergence", - "message": "ORF 'nef' exceeds maximum distance tolerance. It is 2.00372 units of distance away from its reference ORF's aminoacid sequence with max tolerance of 1.34148.", - "region": "nef" - }, - { - "qseqid": "MN692074", - "error": "MutatedStartCodon", - "message": "ORF 'nef' has a mutated start codon: 'ACC'.", - "region": "nef" - }, - { - "qseqid": "MN692074", - "error": "MutatedStopCodon", - "message": "ORF 'nef' has a mutated stop codon: 'CAG'.", - "region": "nef" - }, - { - "qseqid": "MN692074", - "error": "RevResponseElementDeletion", - "message": "Query Sequence exceeds maximum deletion tolerance in RRE. Contains 265 deletions with max tolerance of 20 deletions.", - "region": null - }, - { - "qseqid": "MN692074", - "error": "LongDeletion", - "message": "Query sequence contains a very large deletion.", - "region": null - } - ], - "MN692145": [], - "MN090335": [ - { - "qseqid": "MN090335", - "error": "Frameshift", - "message": "ORF 'gag' at 482-1665 contains out of frame indels that impact 1230 positions.", - "region": "gag" - }, - { - "qseqid": "MN090335", - "error": "InternalStop", - "message": "ORF 'gag' at 482-1665 contains an internal stop codon at 683.", - "region": "gag" - }, - { - "qseqid": "MN090335", - "error": "MutatedStartCodon", - "message": "ORF 'gag' has a mutated start codon: 'ATA'.", - "region": "gag" - }, - { - "qseqid": "MN090335", - "error": "MutatedStopCodon", - "message": "ORF 'gag' has a mutated stop codon: 'AAA'.", - "region": "gag" - }, - { - "qseqid": "MN090335", - "error": "Frameshift", - "message": "ORF 'tat_exon2' at 7740-7832 contains out of frame indels that impact 32 positions.", - "region": "tat_exon2" - }, - { - "qseqid": "MN090335", - "error": "Deletion", - "message": "ORF 'tat_exon2' exceeds maximum deletion tolerance. Contains 45 deletions with max tolerance of 0 deletions.", - "region": "tat_exon2" - }, - { - "qseqid": "MN090335", - "error": "PackagingSignalDeletion", - "message": "Query sequence exceeds maximum deletion tolerance in PSI. Contains 42 deletions with max tolerance of 10 deletions.", - "region": null - }, - { - "qseqid": "MN090335", - "error": "MajorSpliceDonorSiteMutated", - "message": "Query sequence has a mutated splice donor site: AT. The context is TTAACTGCGAAT-----CGTTC.", - "region": null - }, - { - "qseqid": "MN090335", - "error": "Scramble", - "message": "Sequence is minus-scrambled.", - "region": null - }, - { - "qseqid": "MN090335", - "error": "InternalInversion", - "message": "Sequence contains an internal inversion.", - "region": null - } - ] -} \ No newline at end of file +{} \ No newline at end of file diff --git a/tests/expected-results-small/holistic.json b/tests/expected-results-small/holistic.json index 239c6c3..9e26dfe 100644 --- a/tests/expected-results-small/holistic.json +++ b/tests/expected-results-small/holistic.json @@ -1,72 +1 @@ -{ - "KX505501.1": { - "intact": false, - "qlen": 1997, - "hypermutation_probablility": 0.7087072014754221, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 1997, - "blast_sseq_coverage": 0.2498456472525211, - "blast_qseq_coverage": 1.2158237356034052, - "blast_sseq_orfs_coverage": 0.17666166916541728, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 4, - "is_reverse_complement": false - }, - "MN691959": { - "intact": false, - "qlen": 9493, - "hypermutation_probablility": 0.19667690182893238, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9493, - "blast_sseq_coverage": 1.0817040543321672, - "blast_qseq_coverage": 1.1086063415148004, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 3, - "is_reverse_complement": false - }, - "MN692074": { - "intact": false, - "qlen": 4178, - "hypermutation_probablility": 0.36378645339477633, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 4178, - "blast_sseq_coverage": 0.5042189750977567, - "blast_qseq_coverage": 1.1728099569171853, - "blast_sseq_orfs_coverage": 0.411544227886057, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 4, - "is_reverse_complement": false - }, - "MN692145": { - "intact": true, - "qlen": 9689, - "hypermutation_probablility": 0.1672411051048176, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9689, - "blast_sseq_coverage": 1.130479522535501, - "blast_qseq_coverage": 1.1271545051088863, - "blast_sseq_orfs_coverage": 1.0, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 3, - "is_reverse_complement": false - }, - "MN090335": { - "intact": false, - "qlen": 9069, - "hypermutation_probablility": 0.1771850809736527, - "inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.CfE", - "blast_matched_qlen": 9069, - "blast_sseq_coverage": 0.9842560197571517, - "blast_qseq_coverage": 1.0603153600176425, - "blast_sseq_orfs_coverage": 1.000374812593703, - "orfs_start": 789, - "orfs_end": 8793, - "blast_n_conseqs": 3, - "is_reverse_complement": false - } -} \ No newline at end of file +{} \ No newline at end of file diff --git a/tests/expected-results-small/regions.json b/tests/expected-results-small/regions.json index 59d7a2f..9e26dfe 100644 --- a/tests/expected-results-small/regions.json +++ b/tests/expected-results-small/regions.json @@ -1,782 +1 @@ -{ - "KX505501.1": [ - { - "region": "gag", - "start": 336, - "end": 1745, - "orientation": "forward", - "distance": 0.41298449612403343, - "indel_impact": 69, - "protein": "MGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ", - "nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTGGATAGATGGGAAAAAATTCGGCTAAGGCCAGGAGGAAATAAAAGATATAAACTAAAACATATAGTATGGGCAAGCAGAGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGCTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCGACTCTTAAGACAGGATCAGAAGAACTTAAGTCTTTGTATAATACAATAGCAGTCCTCTATTGTGTACATCAAAAAATAGATGTGAAAGACACCAAGGAAGCCTTAGACAAGATAGAAGAAGAGCAAAACAAAATTAGGAAACAAGCACAGCAAACCGCAGCTGCAGGAAGTAGCAGTCAGGTCAGCCACAATTACCCTGTAGTGCAGAATCATCAGGGGCAAATGGTATATCAGGCCCTGTCACCAAGAACTTTAAATGCATGGGTAAAAGTAATAGAGGAAAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGACTTAAACACCATGCTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATTAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAGATTTATAAAAGATGGATAATCCTAGGGTTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCTTTTAGAGACTATGTAGACCGGTTCTATAAAACCCTAAGAGCCGAACAAGCCACACAGGAAGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCGAACCCAGATTGTAAAACTATTTTAAAAGCATTAGGACCAGCAGCCACACTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGGACCCAGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAATGACAAATTCAGCTACAGTAATGATGCAGAAAGGCAATTTTAGGAACCAAAGAAAAACTGTTAAGTGCTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACATCAAATGAAAGACTGTACTGAGAGACAGGCTAATTTTGTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACACCAACT", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1628, - "end": 1745, - "orientation": "forward", - "distance": 2.1358565737051802, - "indel_impact": 1950, - "protein": "FCRENLAFPQGKAGEFPSEQTRANSPTSRELQVWGRDTN", - "nucleotides": "TTTTGTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACACCAACT", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "env", - "start": 1746, - "end": 1745, - "orientation": "forward", - "distance": 2.1118604651162807, - "indel_impact": 1714, - "protein": "MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ", - "nucleotides": "", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "vif", - "start": 1746, - "end": 1745, - "orientation": "forward", - "distance": 2.0488262910798123, - "indel_impact": 386, - "protein": "MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ", - "nucleotides": "", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 1746, - "end": 1745, - "orientation": "forward", - "distance": 2.0036496350364965, - "indel_impact": 194, - "protein": "MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ", - "nucleotides": "", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 1746, - "end": 1745, - "orientation": "forward", - "distance": 2.218055555555555, - "indel_impact": 144, - "protein": "MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPT", - "nucleotides": "", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 1746, - "end": 1745, - "orientation": "forward", - "distance": 2.2499999999999996, - "indel_impact": 51, - "protein": "MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPT", - "nucleotides": "", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 1746, - "end": 1745, - "orientation": "forward", - "distance": 2.044, - "indel_impact": 166, - "protein": "MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ", - "nucleotides": "", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "tat_exon2", - "start": 1746, - "end": 1745, - "orientation": "forward", - "distance": 2.2419354838709675, - "indel_impact": 62, - "protein": "RCIRSTTRTADTELFTRDFPLGTFQ", - "nucleotides": "", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 1746, - "end": 1745, - "orientation": "forward", - "distance": 2.214130434782609, - "indel_impact": 184, - "protein": "RCIRSTTRTADTELFTRDFPLGTFQ", - "nucleotides": "", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 1746, - "end": 1777, - "orientation": "forward", - "distance": 2.03710407239819, - "indel_impact": 425, - "protein": "MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ", - "nucleotides": "AGATGCATCCGGAGTACTACAAGGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MN691959": [ - { - "region": "gag", - "start": 639, - "end": 2141, - "orientation": "forward", - "distance": 0.05400000000000005, - "indel_impact": 0, - "protein": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALEKIEEEQNKSKKKAQQAAADTGHSNQISQNYPIVQNIQGQVVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNXXXPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKECTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPSQKQEPIDKELYPLTSLRSLFGNDPSSQ", - "nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGAGAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAACCAGATCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAGTGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCYWYCTWTCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAAGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCWGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGAGTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1934, - "end": 4945, - "orientation": "forward", - "distance": 0.03585657370517925, - "indel_impact": 0, - "protein": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSLSEAGADRQGTVSFNFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTRALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKITTESIVIWGKTPRFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNKGRQKVVPLTNTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKILFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSATVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAAAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAGAGCATTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGGAAATATGCAAGAACGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGTATAGTAATATGGGGAAAGACTCCTAGATTTAAACTACCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAATATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCTTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGGTATGTTACTAACAAAGGAAGACAAAAGGTTGTCCCCCTAACTAACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAATACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTGCTACGGTTAAGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4890, - "end": 5468, - "orientation": "forward", - "distance": 0.0625, - "indel_impact": 0, - "protein": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYERTHPRISSEVYIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH", - "nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGAACTCATCCAAGAATAAGTTCAGAAGTATACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACATATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACAAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5408, - "end": 5698, - "orientation": "forward", - "distance": 0.0625, - "indel_impact": 0, - "protein": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTQQKRARNGASRS", - "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCAACAGAAGAGAGCAAGAAATGGAGCCAGTAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5679, - "end": 5893, - "orientation": "forward", - "distance": 0.25, - "indel_impact": 0, - "protein": "MEPVDPRLEPWKHPGSQPKTACTTCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRPPQDSQTHQVSLSKX", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCACTTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATGACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5818, - "end": 5893, - "orientation": "forward", - "distance": 0.46153846153846145, - "indel_impact": 0, - "protein": "MAGRSGDSDEDLLKTVRLIKFLYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5910, - "end": 6155, - "orientation": "forward", - "distance": 1.704878048780488, - "indel_impact": 122, - "protein": "MQPIQIAIVALVVAIIIAIVV", - "nucleotides": "ATGCAACCTATACAAATAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGATCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGGAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAGTGTCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATATTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 6070, - "end": 8655, - "orientation": "forward", - "distance": 0.10139372822299642, - "indel_impact": 0, - "protein": "MRVKEKCQHLWRWGWRWGTMLLGILMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDAEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLGNATNTNSSNTNSSSGEMMMEKGEIKNCSFNISTSIRNKVQKEYAFFYKLDIVPIDNDTTSYTLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEDVVIRSANFTDNAKTIIVQLNQSVEINCTRPNNNTRKSIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNATLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQFINMWQEVGKAMYAPPISGQIRCSSNITGLLLTRDGGNNNNGSEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNNMTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPNPGGPDRPEGIEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGAYRAIRHIPRRIRQGLERILL", - "nucleotides": "ATGAGAGTGAAGGAGAAGTGTCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATATTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATGCAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGGGGAATGCTACTAATACCAATAGTAGTAATACCAATAGTAGTAGCGGGGAAATGATGATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAAATAAGGTGCAGAAAGAATATGCATTCTTTTATAAACTTGATATAGTACCAATAGATAATGATACTACCAGCTATACGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGATGTAGTAATTAGATCTGCCAATTTCACAGACAATGCTAAAACCATAATAGTACAGCTGAACCAATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGTATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATGCCACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACACTCCCATGCAGAATAAAACAATTTATAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGCGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAACAACAATGGGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAACAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGCTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATAACATGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAAACCCGGGGGGACCCGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTAGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAATATTGGAGTCAGGAGCTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCTATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTATAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8237, - "end": 8329, - "orientation": "forward", - "distance": 0.19354838709677424, - "indel_impact": 32, - "protein": "RPTSQTRGDPTGPKE", - "nucleotides": "AGACCCACCTCCCAAACCCGGGGGGACCCGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8238, - "end": 8513, - "orientation": "forward", - "distance": 0.16304347826086962, - "indel_impact": 0, - "protein": "DPPPKPGGTRQARRNRRRRWRERQRQIHSISERILSTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTILESGAKE", - "nucleotides": "GACCCACCTCCCAAACCCGGGGGGACCCGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTAGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAATATTGGAGTCAGGAGCTAAAGAATAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8657, - "end": 9277, - "orientation": "forward", - "distance": 0.043689320388349495, - "indel_impact": 0, - "protein": "MGGKWSKSSVIGWPTVRERMRRAEPAADGVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGIRYPLTFGWCYKLVPVEPDKVEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC", - "nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATGGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATCAGATATCCATTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGGTAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGAATGGATGACCCTGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MN692074": [ - { - "region": "gag", - "start": 789, - "end": 2291, - "orientation": "forward", - "distance": 0.14990059642147102, - "indel_impact": 0, - "protein": "MGARASVLSAGELDKWEKIRLRPGGKKKYRLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTIATLYCVHQKIEVKDTKEALEKVEEEQNKSKKKAQQAAADTGNNSQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIIMGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQKGNFRNQKRNVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPAQKQEPIDKELYPLASLRSLFGNDPSSQ", - "nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGTGCGGGAGAATTAGACAAATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAATAGCAACCCTCTATTGTGTGCATCAAAAGATAGAGGTAAAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGCAGCAGCTGACACAGGAAACAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCACCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAGCCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCATGGGATTAAATAAAATAGTAAGGATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGATCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTGAAAGCATTGGGACCAGCCGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAAGCAAGAGTTTTGGCGGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAAAGGCAATTTTAGGAACCAAAAAAGGAATGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCGCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 2084, - "end": 4081, - "orientation": "forward", - "distance": 0.8050695825049854, - "indel_impact": 676, - "protein": "FFREDLAFPQGKAREFPSEQTRANSPTRRELQVWGRDNNSRSEAGADRQGTVSFSFPQITLWQRPLVTIRIGGQLKEALLDTGADDTVLEEMNLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLRKKKSVTVLDVGDAYFSVPLDKEFRKYTAFTIPSRNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPEIVIYQYMDDLYVGSDLEIGQHRIKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEIVPLTKEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYAKMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI", - "nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCGCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAGGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAGAAAGAAAAAATCAGTAACAGTACTGGATGTGGGGGATGCATATTTTTCAGTTCCCTTAGATAAAGAATTTAGGAAATATACTGCATTTACCATACCCAGTAGAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATTTTAGAGCCTTTTAGAAAACAAAATCCAGAAATAGTTATCTATCAATACATGGATGATTTATATGTAGGATCTGATTTAGAAATAGGGCAGCATAGAATAAAAATAGAGGAACTGAGACAACATCTGTTGAGATGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCCCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAATAGTACCACTAACAAAAGAAGCAGAGCTGGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTATATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGTCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAAAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCCATACAAAAAGAAACATGGGAAGCATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAATTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGAT", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "env", - "start": 4082, - "end": 4081, - "orientation": "forward", - "distance": 2.055707762557078, - "indel_impact": 1714, - "protein": "MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI", - "nucleotides": "", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "vif", - "start": 4082, - "end": 4081, - "orientation": "forward", - "distance": 1.997872340425532, - "indel_impact": 386, - "protein": "MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI", - "nucleotides": "", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 4082, - "end": 4081, - "orientation": "forward", - "distance": 1.9950980392156863, - "indel_impact": 194, - "protein": "MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI", - "nucleotides": "", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 4082, - "end": 4081, - "orientation": "forward", - "distance": 2.218055555555555, - "indel_impact": 144, - "protein": "MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPD", - "nucleotides": "", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 4082, - "end": 4081, - "orientation": "forward", - "distance": 2.2499999999999996, - "indel_impact": 51, - "protein": "MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPD", - "nucleotides": "", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 4082, - "end": 4081, - "orientation": "forward", - "distance": 2.0005208333333333, - "indel_impact": 166, - "protein": "MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI", - "nucleotides": "", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "tat_exon2", - "start": 4082, - "end": 4081, - "orientation": "forward", - "distance": 2.2419354838709675, - "indel_impact": 62, - "protein": "TQWRALRCCI", - "nucleotides": "", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 4082, - "end": 4081, - "orientation": "forward", - "distance": 2.214130434782609, - "indel_impact": 184, - "protein": "TQWRALRCCI", - "nucleotides": "", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 4082, - "end": 4085, - "orientation": "forward", - "distance": 2.0037162162162163, - "indel_impact": 412, - "protein": "MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI", - "nucleotides": "ACCC", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MN692145": [ - { - "region": "gag", - "start": 775, - "end": 2280, - "orientation": "forward", - "distance": 0.16267465069860276, - "indel_impact": 0, - "protein": "MGARASVLSGGKLDQWEKIRLRPGGKKKYQLKHIVWASKELERFAVNPGLLETTEGCRQILGQLQPSLQTGSEELKSLYNTVATLYCVHQRIDVKDTKEALDKIEEEQNKSKEKAKQAAADTGNSNQVSQNYPIVQNLQGQMVHQAISPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKESFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPGATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRSQRKIVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKQESTGKELYPPLASLRSLFGNDPSSQ", - "nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAAAATTAGATCAATGGGAGAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATCAATTAAAACATATAGTATGGGCAAGCAAGGAACTAGAACGATTTGCAGTTAATCCTGGCCTGTTAGAAACAACAGAAGGATGTAGACAAATACTGGGACAGCTACAACCATCTCTTCAGACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGATGTAAAAGACACCAAGGAAGCATTAGATAAGATAGAGGAAGAGCAAAACAAGAGTAAGGAAAAAGCAAAACAAGCAGCAGCTGACACAGGAAACAGCAACCAGGTCAGCCAAAATTACCCTATAGTGCAGAACCTTCAGGGCCAAATGGTACACCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAATAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACCACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGGATGTATAGTCCTACCAGCATTCTGGATATAAGACAAGGACCAAAAGAATCCTTTAGAGATTATGTAGACCGGTTCTACAAAACTCTAAGAGCTGAGCAAGCCTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCAAACCCAGATTGTAAGACTATCTTAAAAGCATTGGGACCAGGAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAAGCAAGAGTCTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAGCCAAAGGAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAGGGGCATATAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAAAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGAAATTTCCTTCAGAGCAGGCCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGAGTCGACAGGCAAGGAACTGTATCCTCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 2070, - "end": 5084, - "orientation": "forward", - "distance": 0.10796019900497522, - "indel_impact": 0, - "protein": "FFRENLAFPQGEARKFPSEQARANSPTRRELQVWGRDNNSLSEAGVDRQGTVSSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQISIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALTEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQCSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKESWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGAKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARVRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGASNRETKLGKAGYVTDRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLINKEKVYLAWVPAHKGIGGNEQVDKLVSTGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAREIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKVKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGAAATTTCCTTCAGAGCAGGCCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGAGTCGACAGGCAAGGAACTGTATCCTCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTACCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATTAAAGTAAGACAGTATGATCAGATATCCATAGAAATCTGTGGACATAAAGCTATAGGGACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAAATTGGTTGTACTTTAAATTTCCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAACAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGCACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAAAGAACTCAAGATTTCTGGGAAGTTCAATTAGGGATACCACATCCAGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAGGATTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAATGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAATTGAGACAACATCTGTTGAGGTGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGAAAGCTGGACTGTCAATGACATACAAAAGTTAGTGGGTAAATTGAATTGGGCAAGTCAGATTTATGCAGGGATCAAAGTAAAACAATTATGTAAACTTCTTAGGGGAGCCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTTTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAGATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAGTGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAGCCACTGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAGGTTACCCATACAAAAAGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCATCTAATAGAGAGACTAAATTAGGAAAAGCAGGATATGTTACTGACAGAGGAAGACAGAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAACAGTTAATAAACAAGGAAAAGGTCTACTTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTTAGTACTGGAATTAGAAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGAAGAACATGAGAAATACCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTGGTAGCAAGAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAGCTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGACAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGTAATTTCACTAGTACTACAGTTAAGGCCGCCTGTTGGTGGGCGGGGATCAAGCAGGAGTTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGACCAGGCTGAACATCTTAAAACAGCAGTACAAATGGCAGTATTCATTCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTGATACAAGACAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGTAAAAATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 5029, - "end": 5607, - "orientation": "forward", - "distance": 0.21875, - "indel_impact": 0, - "protein": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKAKGWFYRHHYESHHPRISSEVHVPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPNLADQLIHLYYFDCFSESAIRSAILGHIVSPSCEYQAGHNKVGSLQYLALAALITPKRRKPPLPSVAKLTEDRWNKPQKTKGHRGSHIRNGH", - "nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAAGGGATGGTTTTATAGACATCACTATGAAAGCCATCATCCAAGAATAAGTTCAGAAGTACATGTCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGATTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCCAACCTAGCAGACCAACTGATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAGTGCCATATTAGGACATATAGTTAGTCCTAGTTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCATTAGCAGCATTAATAACACCAAAAAGGAGAAAGCCCCCTTTGCCTAGTGTTGCAAAGCTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACCGAGGGAGCCATATCAGGAATGGACACTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 5547, - "end": 5837, - "orientation": "forward", - "distance": 0.34375, - "indel_impact": 0, - "protein": "MEQAPEDQGPPREPYQEWTLELLEELKNEAVRHFPRMWLHGLGQHIYDTYGDTWTGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRTRNGANRS", - "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACCGAGGGAGCCATATCAGGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATGTGGCTCCATGGCTTAGGGCAACATATCTATGACACTTATGGGGATACTTGGACAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTCATTCATTTCAGGATTGGGTGTCAACATAGCAGAATAGGCATTATTCGCCAGAGGAGAACAAGAAATGGAGCCAATAGATCCTAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5818, - "end": 6032, - "orientation": "forward", - "distance": 0.25, - "indel_impact": 0, - "protein": "MEPIDPRLEPWKHPGSQPKTACTSCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRASQDSQTHQVSLSKX", - "nucleotides": "ATGGAGCCAATAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGCCAGCCTAAGACTGCTTGTACCTCTTGCTATTGTAAAAAGTGTTGCTTTCATTGTCAAGTCTGTTTCATGACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5957, - "end": 6032, - "orientation": "forward", - "distance": 0.34615384615384626, - "indel_impact": 0, - "protein": "MAGRSGDSDEELLKTVRLIKFLYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 6049, - "end": 6297, - "orientation": "forward", - "distance": 0.6733333333333333, - "indel_impact": 0, - "protein": "MNSLQISAIVAIVVAIILAIVVWSIVFIEYRKILRQRKIDRLIDRIRERAEDSGNESEGDQEELSTALMEMGHHAPWDVDDL", - "nucleotides": "ATGAACTCTTTACAAATATCAGCAATAGTAGCAATAGTAGTAGCAATAATACTAGCAATAGTTGTGTGGTCTATAGTATTCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGATAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAACTATCAACAGCACTTATGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 6212, - "end": 8782, - "orientation": "forward", - "distance": 0.5647651006711409, - "indel_impact": 0, - "protein": "MRVKGIRKNYQQHLWRWGTMLLGMLMICSAAGNLWVTVYYGVPVWKEATTTLFCASDAKAYGTEVHNVWATHACVPTDPNPQEVRLENVTENFNMWRNPMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDLGQESTNNNITGSIEKGEMKNCSFNISSSKGSKGHKEYASFYKLDVVPIGNNKTNGNYSSYRLISCNTSVITQACPKVSFEPIPIHFCAPAGYAILKCNDKQFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSANFTDNAKTIIVQLNQSVEITCTRPGNNTIRGVHMGPGKAFYAGAITGDIRKAHCIINKAKWNSTLRLVVAKLSEQFRNKTINFTQPSGGDPEIVTHSFNCGGEFFYCNTTKLFNSTWNATHMINGTWNDTEGNITLPCRIKQIINMWQEVGKAMYAPPIRGQIRCSSNITGLLLTRDGGKTNESNTTEIFRPGGGDMRDNWRSELYKYKVVRIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGATSMALTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGVKQLQARVLALERYLKDQQLLGFWGCSGKLICTTDVPWNASWSNKSLGQIWENMTWMEWEREIDNYTSLIYTLITESQIQQEQNEKELLELDKWANLWNWFGITNWLWYIKIFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTRFPAPRGPDRPEGIEEEGGERDRDRSAPLVKGFLALIWVDLRSLCLFSYHRLRDLLLIITRIVELLGRRGWEVLKYLWSLLQYWTQELKNSAVSLLNATAIAVAEGTDRIIEVLQRVFRAILHIPTRIRQGFERALL", - "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAACTATCAACAGCACTTATGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTGCAGGCAATTTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGGTACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTGAGATTGGAAAATGTGACAGAAAATTTTAATATGTGGAGAAATCCAATGGTAGAACAGATGCATGAGGATATAATTAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAGTTGACCCCACTCTGTGTCACTTTAAATTGCACTGATTTGGGACAGGAGAGTACCAATAATAATATCACAGGAAGTATAGAGAAAGGAGAAATGAAAAACTGCTCTTTCAATATCTCCTCAAGCAAAGGAAGTAAGGGGCATAAAGAATATGCATCTTTTTATAAACTTGATGTAGTACCAATAGGTAATAATAAGACTAATGGTAATTATTCTAGCTATAGGTTGATAAGTTGTAATACCTCAGTCATTACACAGGCCTGTCCAAAAGTATCATTTGAGCCAATTCCCATACATTTTTGTGCCCCGGCTGGGTATGCGATTCTAAAGTGTAATGATAAACAATTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAGTGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGTAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGCCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACCAATCTGTAGAAATTACTTGTACAAGACCCGGTAACAATACAATAAGAGGGGTTCACATGGGACCAGGGAAGGCATTTTATGCAGGAGCAATAACAGGAGATATAAGAAAAGCACATTGTATCATTAATAAAGCAAAATGGAACAGTACTTTAAGACTGGTAGTTGCAAAATTAAGTGAGCAATTTAGGAATAAAACAATAAACTTTACTCAACCCTCAGGAGGGGACCCAGAAATTGTAACACACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACAACAAAACTGTTTAATAGTACTTGGAATGCAACACACATGATTAATGGTACTTGGAATGATACTGAGGGAAATATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGAGGACAAATTAGATGTTCATCCAATATTACAGGGTTGCTATTAACAAGAGATGGTGGTAAGACTAACGAGAGCAACACCACCGAGATCTTTAGACCTGGAGGAGGAGATATGAGAGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAGAATTGAACCATTAGGGGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTCTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAACGTCGATGGCGTTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATTTGTTGCAACTCACAGTCTGGGGCGTCAAGCAGCTCCAGGCAAGAGTCCTGGCTCTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGTTTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGATGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGGACAAATTTGGGAGAACATGACCTGGATGGAGTGGGAAAGAGAAATTGATAATTACACAAGCTTAATATACACTTTAATTACAGAATCGCAGATCCAACAAGAACAGAATGAAAAAGAATTGTTGGAATTGGATAAATGGGCAAATTTGTGGAATTGGTTTGGCATAACAAACTGGCTGTGGTACATAAAAATATTTATAATGATAGTAGGAGGCTTAGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTTCAGACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGCTCCATTAGTGAAAGGATTCTTAGCACTTATCTGGGTCGATCTACGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATAATAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGTCCTCAAATACTTGTGGAGTCTCCTACAGTATTGGACACAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGATTATAGAAGTATTACAAAGAGTTTTTAGAGCTATCCTCCATATCCCTACAAGAATAAGACAGGGCTTTGAAAGGGCTTTGCTATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 8364, - "end": 8456, - "orientation": "forward", - "distance": 0.3870967741935485, - "indel_impact": 0, - "protein": "RPASQPRGDPTGPKESKKKVERETETDPLH", - "nucleotides": "AGACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGCTCCATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 8365, - "end": 8640, - "orientation": "forward", - "distance": 0.26086956521739135, - "indel_impact": 0, - "protein": "DPLPSPEGTRQARRNRRRRWRERQRQIRSISERILSTYLGRSTEPVPLQLPPLERLTLDNNEDCGTSGTQGVGSPQILVESPTVLDTGTKE", - "nucleotides": "GACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGCTCCATTAGTGAAAGGATTCTTAGCACTTATCTGGGTCGATCTACGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATAATAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGTCCTCAAATACTTGTGGAGTCTCCTACAGTATTGGACACAGGAACTAAAGAATAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8784, - "end": 9386, - "orientation": "forward", - "distance": 0.40765550239234427, - "indel_impact": 0, - "protein": "MGGKWSKSSLVGWPNVRERMRRAEPAADGVGAVSRDLERHGAITSSNTATNNADCAWLEAQKEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQKRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCFKLVPVEPTDRENDRLLHPASLQGMEDPEGEVLEWRFDSRLAFHHMARELHPEYYKNC", - "nucleotides": "ATGGGGGGCAAGTGGTCAAAAAGTAGTTTGGTTGGATGGCCTAATGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATGGGGTGGGAGCAGTATCTCGAGACTTGGAAAGACATGGAGCAATCACAAGTAGTAATACAGCAACTAACAATGCTGATTGTGCCTGGCTAGAAGCACAAAAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAAAAGACAAGATATTCTTGATCTATGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGAGCCCACTGACAGAGAGAATGACAGATTGCTGCACCCTGCAAGCCTGCAGGGGATGGAAGACCCGGAGGGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTGGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAGTACTACAAGAACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ], - "MN090335": [ - { - "region": "gag", - "start": 481, - "end": 1664, - "orientation": "forward", - "distance": 0.9171874999999998, - "indel_impact": 1230, - "protein": "MAYSPVAAPRLLPCALQQATPQDLNTMLNTVGGHQAAMQMLKEVINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQATGTANILMQRGNFRNQRKPVKCFNCGKEGHIAKNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSHKGRPGNFPQSRPEIPQSRPEPTAPPAPPEESFRFGEATTPSQKQETMDKELYPPLTSLRSLFGNDPSSQ", - "nucleotides": "ATATGTTTTAATCTATATTTTTTCTTTCCCCCTGGCCTTAACCGAATTTTCTCCCATCTATCTAATTCTCCCCCGCTCAATACTGACGCTCTCGCACCCATCTCTCTCCTTCTAGCCTCCGCTAGTCAAAAATGGCGTACTCACCAGTCGCCGCCCCTCGCCTCTTGCCGTGCGCGCTTCAGCAAGCCACCCCACAAGATTTGAACACTATGCTAAACACAGTGGGTGGACACCAAGCAGCTATGCAAATGTTAAAAGAGGTCATCAATGAGGAAGCTGCAGAATGGGATAGATTACATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACACATAATCCACCTATCCCAGTAGGAGAGATTTATAAAAGATGGATAATTCTGGGACTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTTTGGACATAAAGCAAGGGCCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTTTAAGAGCCGAGCAAGCTACACAGGAAGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCAAATCCAGATTGTAAAACTATTTTAAAAGCATTAGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGGGGACCCGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGCAACAGGTACAGCCAACATACTGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGCCTGTTAAATGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACACCAAATGAAGGATTGCACTGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCTCACAAGGGAAGGCCAGGGAACTTTCCTCAGAGCCGACCAGAAATTCCTCAGAGCAGACCAGAGCCAACAGCCCCACCAGCCCCTCCAGAAGAGAGCTTCAGGTTTGGGGAAGCAACAACTCCCTCTCAGAAGCAGGAGACAATGGACAAGGAGCTGTATCCTCCTCTAACTTCCCTCAGATCACTCTTTGGCAACGACCCATCGTCACAATAA", - "subtype_start": 789, - "subtype_end": 2291, - "subtype_aminoacids": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*", - "subtype_nucleotides": "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA" - }, - { - "region": "pol", - "start": 1427, - "end": 4468, - "orientation": "forward", - "distance": 0.23952802359881997, - "indel_impact": 0, - "protein": "FFRENLAFSQGKARELSSEPTRNSSEQTRANSPTSPSRRELQVWGSNNSLSEAGDNGQGAVSSSNFPQITLWQRPIVTIKIGGQLKEALLDTGADDTVLEDIDLPGKWKPKMIGGIGGFIKVRQYEQVPIEISGHKAIGTVLVGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDGTKWRKLVDFRELNKRTQEFWEIQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKTNPDIVIYQYMDDLYVGSDLEIGQHRTKVEELRQHLMRWGFTTPDKKHQKEPPFLWMGYELHPNKWTVQPIVLPEKESWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKDPVHGAYYDPAKDLIAELQKQGEGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKISTESIVIWGRTPKFRLPIQKETWETWWTDYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRDNKSGKAGYVTDRGRQKVVTINDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIEKAQEDHEKYHSNWRTMASDFNLPPIVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEEKIILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKTIHTDNGRNFTSNTVKAACWWAGIKQEFGIPYNPQSQGVVESMNNELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATEIQTKELQKQITKIPNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", - "nucleotides": "TTTTTTAGGGAAAATCTGGCCTTCTCACAAGGGAAGGCCAGGGAACTTTCCTCAGAGCCGACCAGAAATTCCTCAGAGCAGACCAGAGCCAACAGCCCCACCAGCCCCTCCAGAAGAGAGCTTCAGGTTTGGGGAAGCAACAACTCCCTCTCAGAAGCAGGAGACAATGGACAAGGAGCTGTATCCTCCTCTAACTTCCCTCAGATCACTCTTTGGCAACGACCCATCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAGGACATAGATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAATATGAACAGGTACCCATAGAAATTAGCGGACACAAAGCTATAGGTACAGTGTTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGCACTCTAAATTTTCCCATCAGCCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGACGGCCCAAAGGTTAAACAATGGCCATTAACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTGTTTGCCATAAAGAAAAAAGATGGCACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGAATTCTGGGAAATTCAATTAGGTATACCACATCCTGCAGGGCTAAAAAAGAAAAAATCAGTCACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATAAAGACTTCAGGAAATATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTATAATGTGCTTCCACAAGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAGATCCTGGAGCCTTTTAGAAAGACAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAACATAGAACAAAAGTAGAGGAACTGAGGCAACATCTGATGAGGTGGGGATTTACCACACCAGACAAAAAGCACCAGAAAGAACCCCCATTCCTTTGGATGGGGTATGAACTCCATCCTAATAAATGGACAGTACAGCCTATAGTACTGCCAGAAAAGGAAAGTTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTATGCAGGGATTAAAGTGAAGCAATTATGTAAGCTCCTTAGGGGAACCAAAGCATTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGATCCGGTACATGGAGCATATTATGACCCAGCAAAGGACTTAATAGCAGAACTACAGAAGCAGGGAGAAGGTCAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGGAAATATGCAAGAACGAGGGGTGCCCATACTAATGATGTAAAGCAATTAACAGAGGCAGTGCAAAAAATATCCACAGAAAGTATAGTAATATGGGGAAGGACTCCTAAATTTAGATTACCCATACAAAAAGAAACATGGGAAACATGGTGGACAGATTATTGGCAAGCCACCTGGATCCCTGAGTGGGAGTTTGTCAACACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAAAAAGAACCCATAATAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAATAGGGATAATAAATCAGGAAAAGCAGGATATGTTACTGATAGAGGAAGACAAAAGGTTGTCACCATAAATGACACAACAAATCAGAAAACTGAATTACAAGCCATTCATCTAGCCTTGCAGGATTCGGGGCTAGAAGTAAACATAGTAACAGACTCACAGTATGCATTAGGGATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTCAGTCAAATAATAGAACAGTTAATAAAAAAGGAAAAGGTCTACCTGGCATGGGTGCCAGCCCACAAAGGAATTGGAGGAAATGAACAGGTAGATAAACTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGAAAAAGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAACAATGGCTAGTGATTTTAACCTACCACCTATAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCTATGCATGGGCAAGTAGACTGTAGTCCAGGTATATGGCAGCTAGATTGTACACATTTAGAAGAAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTGATTCCAGCAGAGACAGGGCAAGAAACAGCATACTTTATTTTAAAACTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCCGCAATTTCACCAGTAATACAGTGAAGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGTATTCCCTACAATCCCCAGAGTCAAGGAGTAGTAGAATCTATGAATAATGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGATACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGAAATACAGACTAAAGAATTACAAAAACAAATTACAAAAATTCCAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAACAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAAATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG", - "subtype_start": 2084, - "subtype_end": 5095, - "subtype_aminoacids": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*", - "subtype_nucleotides": "TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG" - }, - { - "region": "vif", - "start": 4413, - "end": 4991, - "orientation": "forward", - "distance": 0.34375, - "indel_impact": 0, - "protein": "MENRWQVMIVWQVDRMRIRTWKSLVKYHMYISKKAKKWAYRHHYETTHPRISSEVHIPLGDDRLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPDLADHLIHVHYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALTAVITPKKIKPPLPSVKKLTEDRWNKPQKTKGHRESHTMNGH", - "nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAATACCATATGTATATTTCAAAGAAAGCCAAAAAATGGGCTTATAGACACCATTACGAAACCACTCATCCAAGAATAAGTTCAGAAGTACATATCCCACTAGGGGATGATAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAGAGAGACTGGCATTTGGGCCAGGGAGTCTCCATAGAATGGAGAAAAAGGAGATATAGCACACAAGTAGACCCTGACTTAGCAGACCATCTAATTCATGTGCATTATTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATAGAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAACAGCAGTAATAACACCAAAGAAGATAAAGCCACCTTTGCCTAGTGTAAAGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAG", - "subtype_start": 5040, - "subtype_end": 5618, - "subtype_aminoacids": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYESPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*", - "subtype_nucleotides": "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG" - }, - { - "region": "vpr", - "start": 4931, - "end": 5221, - "orientation": "forward", - "distance": 0.38144329896907214, - "indel_impact": 0, - "protein": "MEQAPEDQGPQREPYNEWTLELLEELKREAVRHFPRPWLQDLGQYIYETYGDTWTGVEAIIRILQQMLFIHFRIGCHHSRIGIVLQRRARNGASRS", - "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAGAGCTTTTAGAAGAGCTTAAGAGAGAAGCTGTTAGACATTTCCCTAGACCATGGCTACAAGACCTAGGACAATATATCTATGAAACTTATGGGGATACTTGGACAGGAGTGGAAGCCATAATAAGAATTCTGCAACAAATGCTGTTTATTCATTTCAGAATTGGGTGTCACCATAGCAGAATAGGCATTGTTCTGCAAAGAAGAGCAAGAAATGGAGCCAGTAGATCATAG", - "subtype_start": 5558, - "subtype_end": 5848, - "subtype_aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTRQRRARNGASRS*", - "subtype_nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" - }, - { - "region": "tat_exon1", - "start": 5202, - "end": 5416, - "orientation": "forward", - "distance": 0.41666666666666674, - "indel_impact": 0, - "protein": "MEPVDHRLEPWKHPGSQPRTPCTNCYCKKCCFHCQVCFIQKALGISYGRKKRRQRRRSPQDSQTHQVPLPKX", - "nucleotides": "ATGGAGCCAGTAGATCATAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTCCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTGTGTTTTATACAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCA", - "subtype_start": 5829, - "subtype_end": 6043, - "subtype_aminoacids": "MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQNSQTHQASLSKX", - "subtype_nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "rev_exon1", - "start": 5341, - "end": 5416, - "orientation": "forward", - "distance": 0.5769230769230769, - "indel_impact": 0, - "protein": "MAGRSGDRDEDLLKTVRLIKFLYQSX", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCA", - "subtype_start": 5968, - "subtype_end": 6043, - "subtype_aminoacids": "MAGRSGDSDEELIRTVRLIKLLYQSX", - "subtype_nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCA" - }, - { - "region": "vpu", - "start": 5433, - "end": 5681, - "orientation": "forward", - "distance": 0.6837209302325582, - "indel_impact": 0, - "protein": "MQSLEILAIVALVVAAIIAIVVWTIVGIEYRKILRQRKIDKIINRIRERAEDSGNESEGDQEELSALVVEMGHDAPWDVDDL", - "nucleotides": "ATGCAATCTTTAGAGATATTAGCAATAGTAGCATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTAGGCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGATAAAATAATTAATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGCGAAGGGGATCAGGAAGAATTGTCAGCACTTGTGGTGGAAATGGGGCACGATGCTCCTTGGGATGTTGATGATCTGTAG", - "subtype_start": 6060, - "subtype_end": 6308, - "subtype_aminoacids": "MQPIPIVAIVALVVAIIIAIVVWSIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEISALVEMGVEMGHHAPWDVDDL*", - "subtype_nucleotides": "ATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" - }, - { - "region": "env", - "start": 5596, - "end": 8157, - "orientation": "forward", - "distance": 0.6589887640449441, - "indel_impact": 31, - "protein": "MRAKGIRKNCQHLWWKWGTMLLGMLMICSAAENLWVTVYYGVPVWRDANTTLFCASDAKAYDTEVHNVWATHACVPTDPNPHEVELKNVTENFNMWKNNMVDQMHEDIINLWDQSLKPCAKLTPLCVTLNCTDLRNDTVGNQTNLTETNTIQGREMTNCSFNITTEIRDKVRKEHALFYKLDVMPIDRDNTSYTLINCNTSVITQACPKVTFEPIPIHYCTPAGFAILKCKDEMFNGTGPCKNVSTVQCTHGIRPVVSTQLLLNGSLAEKEIVLRSENFTDNGKNIIVQLNRSIVINCTRPNNNTRKSISVAGRAIYATGQIIGDIRQAHCNISETDWNDTLSKIVEKLREKFGKDKTIIFNQSSGGDMEIETHSFNCGGEFFYCNTTRLFNSTWSVNGTSINGTKNITLPCRIKQIINRWQEVGKAMYAPPISGIIRCSSNITGLILTRDGGTNNSTEETETFRPGGGNMKDNWRSELYKYKVVKIEPLGVAPTKAQRRVVQREKRAIGALGAMFLGFLGAAGSTMGAASLTLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLSVERYLQDQQLLGIWGCSGKLICTTTVPWNTSWSNKSYDTIWDNMTWMQWDREIQNYTGKIYNLLEESQIQQEKNEKELLELDQWANLWNWFSITKWLWYIKIFIMIVGGLVGLKIIFAVFSIVNRVRQGYSPLSLQTLLPTPRGPDRPEGIEEEGGERDRGRSGRLVTGFLPLIWDDLRSLCLFSYHHLRDLLLIVLRTVQVLGHRGWEILKYWWSLLQYWIQELKNSAVSLLNTIAIAVAEGTDRVIEVGQRIGRAFLHIPRRIRQGLERALQ", - "nucleotides": "ATGAGAGCGAAGGGGATCAGGAAGAATTGTCAGCACTTGTGGTGGAAATGGGGCACGATGCTCCTTGGGATGTTGATGATCTGTAGTGCTGCGGAAAATTTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAGAGATGCAAATACCACTCTATTCTGTGCATCAGATGCTAAAGCATATGATACAGAAGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCTAACCCACATGAAGTAGAATTGAAAAATGTGACAGAAAATTTTAATATGTGGAAAAATAACATGGTGGACCAGATGCATGAGGATATAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGCAAAATTGACCCCACTCTGTGTTACTTTAAATTGCACTGATTTAAGAAATGATACTGTAGGAAATCAAACAAATCTCACTGAAACTAATACAATACAGGGAAGAGAAATGACAAACTGCTCTTTCAATATCACCACAGAGATAAGAGATAAAGTGCGAAAAGAACATGCACTTTTTTATAAACTTGATGTCATGCCAATAGATAGGGATAATACAAGTTATACATTGATAAATTGCAATACCTCAGTCATTACACAGGCCTGTCCAAAGGTAACCTTTGAACCAATTCCCATACATTATTGTACTCCAGCTGGTTTTGCAATTCTAAAGTGTAAGGATGAGATGTTCAATGGAACAGGACCATGTAAGAATGTCAGCACAGTACAATGTACACATGGAATTAGACCAGTAGTGTCAACTCAACTACTGTTAAATGGTAGCCTAGCAGAAAAAGAGATAGTACTTAGATCTGAAAATTTCACAGACAATGGTAAAAATATAATAGTACAGCTAAATAGATCTATAGTAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGTATAAGTGTAGCAGGGAGAGCAATTTATGCAACAGGGCAGATAATAGGAGATATAAGACAAGCACATTGTAACATCAGTGAGACAGATTGGAATGACACTTTAAGCAAAATAGTTGAAAAATTAAGGGAAAAATTTGGAAAAGATAAAACAATAATCTTTAATCAATCATCAGGAGGGGACATGGAAATTGAAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAACACAACACGACTGTTTAATAGTACTTGGAGTGTTAATGGAACTAGCATAAACGGAACTAAAAATATCACACTCCCGTGCAGAATAAAACAAATTATAAACAGGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCTATCAGTGGGATAATTAGGTGCTCATCAAATATTACAGGACTAATATTAACAAGAGATGGTGGTACAAATAATAGTACAGAGGAGACGGAGACCTTCAGACCTGGAGGGGGAAATATGAAGGACAATTGGAGAAGTGAATTATATAAATACAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAAGGCACAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAATAGGAGCGTTAGGAGCTATGTTCCTCGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCACTGACGCTGACGGTACAAGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATCTGCTGAGGGCTATTGAGGCGCAACAGCATATGTTGCAACTCACAGTCTGGGGCATCAAACAGCTCCAGGCAAGAGTCCTGTCTGTGGAAAGATACCTACAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTACTGTGCCTTGGAATACTAGTTGGAGCAATAAATCTTACGATACCATCTGGGATAACATGACCTGGATGCAGTGGGACAGAGAAATTCAAAATTACACAGGGAAAATATACAACTTACTTGAGGAATCGCAAATCCAACAGGAAAAGAATGAAAAGGAATTATTAGAACTAGATCAATGGGCAAATTTGTGGAATTGGTTTAGTATAACAAAATGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTAGTAGGTTTAAAAATAATTTTTGCTGTATTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCATTGCAGACCCTCCTCCCAACCCCGAGGGGACCAGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGGCAGATCCGGAAGATTAGTGACTGGATTCTTACCACTCATCTGGGACGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTACTCTTGATTGTGCTGAGGACTGTGCAAGTTCTGGGACACAGGGGGTGGGAGATCCTCAAGTACTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATACCATAGCTATCGCAGTAGCTGAGGGAACAGATAGGGTTATAGAGGTAGGACAAAGAATTGGCAGAGCTTTTCTCCACATACCTAGAAGGATAAGACAGGGATTAGAAAGGGCTTTGCAATAA", - "subtype_start": 6223, - "subtype_end": 8793, - "subtype_aminoacids": "MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGTEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL*", - "subtype_nucleotides": "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" - }, - { - "region": "tat_exon2", - "start": 7739, - "end": 7831, - "orientation": "forward", - "distance": 0.4838709677419355, - "indel_impact": 32, - "protein": "RPSSQPRGDQTGPKE", - "nucleotides": "AGACCCTCCTCCCAACCCCGAGGGGACCAGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGGCAGATCCGGAAGATTAG", - "subtype_start": 8375, - "subtype_end": 8467, - "subtype_aminoacids": "RPTSQPRGDPTGPKEPKKKVERETETDPFD*", - "subtype_nucleotides": "AGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAG" - }, - { - "region": "rev_exon2", - "start": 7740, - "end": 8015, - "orientation": "forward", - "distance": 0.4565217391304348, - "indel_impact": 0, - "protein": "DPPPNPEGTRQARRNRRRRWRERQRQIRKISDWILTTHLGRPAEPVPLQLPPLERLTLDCAEDCASSGTQGVGDPQVLVESPAVLDSGTKE", - "nucleotides": "GACCCTCCTCCCAACCCCGAGGGGACCAGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGGCAGATCCGGAAGATTAGTGACTGGATTCTTACCACTCATCTGGGACGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTACTCTTGATTGTGCTGAGGACTGTGCAAGTTCTGGGACACAGGGGGTGGGAGATCCTCAAGTACTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAACTAAAGAATAG", - "subtype_start": 8376, - "subtype_end": 8651, - "subtype_aminoacids": "DPPPNPEGTRQARRNRRRRWRERQRQIHSISERILGTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE*", - "subtype_nucleotides": "GACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAG" - }, - { - "region": "nef", - "start": 8159, - "end": 8812, - "orientation": "forward", - "distance": 0.5935483870967742, - "indel_impact": 0, - "protein": "MGGKWSKGCMAGWPTVRERMERIDPRPAARREQAEPAAAGVGAASRDLEKYGAITSSNTSTTNAACAWLEAQEEEEVGFPVRPQVPLRPMTYKAALDLSHFLKEKGGLEGLIWSQRRQEILDLWVYHTQGYFPDWQNYTPGPGVRFPLTFGWCFKLVPVDPDQVEKANEGENNCLLHPMSLHGMEDSEGEVLMWKFDSSLAFHHRAREQHPEYYKDC", - "nucleotides": "ATGGGTGGCAAGTGGTCAAAAGGTTGTATGGCTGGATGGCCTACTGTAAGGGAAAGAATGGAAAGAATTGATCCAAGGCCTGCTGCAAGGAGGGAACAAGCTGAGCCAGCAGCAGCTGGGGTAGGAGCAGCATCTCGAGACTTGGAAAAATATGGAGCAATCACAAGTAGCAATACATCAACTACTAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAAGTAGGCTTTCCAGTCAGACCTCAGGTACCTTTAAGACCAATGACTTACAAAGCAGCTTTGGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTGGTCCCAGAGAAGACAAGAAATCCTTGATTTGTGGGTTTACCACACACAAGGATACTTCCCTGATTGGCAAAACTACACACCAGGACCAGGGGTCAGATTTCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGACCAAGTAGAGAAGGCCAATGAAGGAGAGAACAACTGCTTGCTACACCCTATGAGCCTGCATGGGATGGAAGACTCAGAGGGAGAAGTGTTAATGTGGAAGTTTGACAGCAGCCTGGCGTTTCATCATAGAGCCCGAGAACAACATCCGGAGTACTACAAAGACTGCTGA", - "subtype_start": 8795, - "subtype_end": 9415, - "subtype_aminoacids": "MGGKWSKSSVIGWPTVRERMRRAEPAADRVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCYKLVPVEPDKIEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*", - "subtype_nucleotides": "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA" - } - ] -} \ No newline at end of file +{} \ No newline at end of file